1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4##############################################################################
5# Defines
6
7# Kselftest framework requirement - SKIP code is 4.
8ksft_skip=4
9
10# Can be overridden by the configuration file.
11PING=${PING:=ping}
12PING6=${PING6:=ping6}
13MZ=${MZ:=mausezahn}
14ARPING=${ARPING:=arping}
15TEAMD=${TEAMD:=teamd}
16WAIT_TIME=${WAIT_TIME:=5}
17PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
18PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
19NETIF_TYPE=${NETIF_TYPE:=veth}
20NETIF_CREATE=${NETIF_CREATE:=yes}
21MCD=${MCD:=smcrouted}
22MC_CLI=${MC_CLI:=smcroutectl}
23PING_COUNT=${PING_COUNT:=10}
24PING_TIMEOUT=${PING_TIMEOUT:=5}
25WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
26INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
27LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
28REQUIRE_JQ=${REQUIRE_JQ:=yes}
29REQUIRE_MZ=${REQUIRE_MZ:=yes}
30REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
31STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
32TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
33
34relative_path="${BASH_SOURCE%/*}"
35if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
36	relative_path="."
37fi
38
39if [[ -f $relative_path/forwarding.config ]]; then
40	source "$relative_path/forwarding.config"
41fi
42
43##############################################################################
44# Sanity checks
45
46check_tc_version()
47{
48	tc -j &> /dev/null
49	if [[ $? -ne 0 ]]; then
50		echo "SKIP: iproute2 too old; tc is missing JSON support"
51		exit $ksft_skip
52	fi
53}
54
55# Old versions of tc don't understand "mpls_uc"
56check_tc_mpls_support()
57{
58	local dev=$1; shift
59
60	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
61		matchall action pipe &> /dev/null
62	if [[ $? -ne 0 ]]; then
63		echo "SKIP: iproute2 too old; tc is missing MPLS support"
64		return $ksft_skip
65	fi
66	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
67		matchall
68}
69
70# Old versions of tc produce invalid json output for mpls lse statistics
71check_tc_mpls_lse_stats()
72{
73	local dev=$1; shift
74	local ret;
75
76	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
77		flower mpls lse depth 2                                 \
78		action continue &> /dev/null
79
80	if [[ $? -ne 0 ]]; then
81		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
82		return $ksft_skip
83	fi
84
85	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
86	ret=$?
87	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
88		flower
89
90	if [[ $ret -ne 0 ]]; then
91		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
92		return $ksft_skip
93	fi
94}
95
96check_tc_shblock_support()
97{
98	tc filter help 2>&1 | grep block &> /dev/null
99	if [[ $? -ne 0 ]]; then
100		echo "SKIP: iproute2 too old; tc is missing shared block support"
101		exit $ksft_skip
102	fi
103}
104
105check_tc_chain_support()
106{
107	tc help 2>&1|grep chain &> /dev/null
108	if [[ $? -ne 0 ]]; then
109		echo "SKIP: iproute2 too old; tc is missing chain support"
110		exit $ksft_skip
111	fi
112}
113
114check_tc_action_hw_stats_support()
115{
116	tc actions help 2>&1 | grep -q hw_stats
117	if [[ $? -ne 0 ]]; then
118		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
119		exit $ksft_skip
120	fi
121}
122
123check_ethtool_lanes_support()
124{
125	ethtool --help 2>&1| grep lanes &> /dev/null
126	if [[ $? -ne 0 ]]; then
127		echo "SKIP: ethtool too old; it is missing lanes support"
128		exit $ksft_skip
129	fi
130}
131
132check_locked_port_support()
133{
134	if ! bridge -d link show | grep -q " locked"; then
135		echo "SKIP: iproute2 too old; Locked port feature not supported."
136		return $ksft_skip
137	fi
138}
139
140if [[ "$(id -u)" -ne 0 ]]; then
141	echo "SKIP: need root privileges"
142	exit $ksft_skip
143fi
144
145if [[ "$CHECK_TC" = "yes" ]]; then
146	check_tc_version
147fi
148
149require_command()
150{
151	local cmd=$1; shift
152
153	if [[ ! -x "$(command -v "$cmd")" ]]; then
154		echo "SKIP: $cmd not installed"
155		exit $ksft_skip
156	fi
157}
158
159if [[ "$REQUIRE_JQ" = "yes" ]]; then
160	require_command jq
161fi
162if [[ "$REQUIRE_MZ" = "yes" ]]; then
163	require_command $MZ
164fi
165if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
166	# https://github.com/vladimiroltean/mtools/
167	# patched for IPv6 support
168	require_command msend
169	require_command mreceive
170fi
171
172if [[ ! -v NUM_NETIFS ]]; then
173	echo "SKIP: importer does not define \"NUM_NETIFS\""
174	exit $ksft_skip
175fi
176
177##############################################################################
178# Command line options handling
179
180count=0
181
182while [[ $# -gt 0 ]]; do
183	if [[ "$count" -eq "0" ]]; then
184		unset NETIFS
185		declare -A NETIFS
186	fi
187	count=$((count + 1))
188	NETIFS[p$count]="$1"
189	shift
190done
191
192##############################################################################
193# Network interfaces configuration
194
195create_netif_veth()
196{
197	local i
198
199	for ((i = 1; i <= NUM_NETIFS; ++i)); do
200		local j=$((i+1))
201
202		ip link show dev ${NETIFS[p$i]} &> /dev/null
203		if [[ $? -ne 0 ]]; then
204			ip link add ${NETIFS[p$i]} type veth \
205				peer name ${NETIFS[p$j]}
206			if [[ $? -ne 0 ]]; then
207				echo "Failed to create netif"
208				exit 1
209			fi
210		fi
211		i=$j
212	done
213}
214
215create_netif()
216{
217	case "$NETIF_TYPE" in
218	veth) create_netif_veth
219	      ;;
220	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
221	   exit 1
222	   ;;
223	esac
224}
225
226declare -A MAC_ADDR_ORIG
227mac_addr_prepare()
228{
229	local new_addr=
230	local dev=
231
232	for ((i = 1; i <= NUM_NETIFS; ++i)); do
233		dev=${NETIFS[p$i]}
234		new_addr=$(printf "00:01:02:03:04:%02x" $i)
235
236		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
237		# Strip quotes
238		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
239		ip link set dev $dev address $new_addr
240	done
241}
242
243mac_addr_restore()
244{
245	local dev=
246
247	for ((i = 1; i <= NUM_NETIFS; ++i)); do
248		dev=${NETIFS[p$i]}
249		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
250	done
251}
252
253if [[ "$NETIF_CREATE" = "yes" ]]; then
254	create_netif
255fi
256
257if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
258	mac_addr_prepare
259fi
260
261for ((i = 1; i <= NUM_NETIFS; ++i)); do
262	ip link show dev ${NETIFS[p$i]} &> /dev/null
263	if [[ $? -ne 0 ]]; then
264		echo "SKIP: could not find all required interfaces"
265		exit $ksft_skip
266	fi
267done
268
269##############################################################################
270# Helpers
271
272# Exit status to return at the end. Set in case one of the tests fails.
273EXIT_STATUS=0
274# Per-test return value. Clear at the beginning of each test.
275RET=0
276
277check_err()
278{
279	local err=$1
280	local msg=$2
281
282	if [[ $RET -eq 0 && $err -ne 0 ]]; then
283		RET=$err
284		retmsg=$msg
285	fi
286}
287
288check_fail()
289{
290	local err=$1
291	local msg=$2
292
293	if [[ $RET -eq 0 && $err -eq 0 ]]; then
294		RET=1
295		retmsg=$msg
296	fi
297}
298
299check_err_fail()
300{
301	local should_fail=$1; shift
302	local err=$1; shift
303	local what=$1; shift
304
305	if ((should_fail)); then
306		check_fail $err "$what succeeded, but should have failed"
307	else
308		check_err $err "$what failed"
309	fi
310}
311
312log_test()
313{
314	local test_name=$1
315	local opt_str=$2
316
317	if [[ $# -eq 2 ]]; then
318		opt_str="($opt_str)"
319	fi
320
321	if [[ $RET -ne 0 ]]; then
322		EXIT_STATUS=1
323		printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
324		if [[ ! -z "$retmsg" ]]; then
325			printf "\t%s\n" "$retmsg"
326		fi
327		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
328			echo "Hit enter to continue, 'q' to quit"
329			read a
330			[ "$a" = "q" ] && exit 1
331		fi
332		return 1
333	fi
334
335	printf "TEST: %-60s  [ OK ]\n" "$test_name $opt_str"
336	return 0
337}
338
339log_test_skip()
340{
341	local test_name=$1
342	local opt_str=$2
343
344	printf "TEST: %-60s  [SKIP]\n" "$test_name $opt_str"
345	return 0
346}
347
348log_info()
349{
350	local msg=$1
351
352	echo "INFO: $msg"
353}
354
355busywait()
356{
357	local timeout=$1; shift
358
359	local start_time="$(date -u +%s%3N)"
360	while true
361	do
362		local out
363		out=$("$@")
364		local ret=$?
365		if ((!ret)); then
366			echo -n "$out"
367			return 0
368		fi
369
370		local current_time="$(date -u +%s%3N)"
371		if ((current_time - start_time > timeout)); then
372			echo -n "$out"
373			return 1
374		fi
375	done
376}
377
378not()
379{
380	"$@"
381	[[ $? != 0 ]]
382}
383
384get_max()
385{
386	local arr=("$@")
387
388	max=${arr[0]}
389	for cur in ${arr[@]}; do
390		if [[ $cur -gt $max ]]; then
391			max=$cur
392		fi
393	done
394
395	echo $max
396}
397
398grep_bridge_fdb()
399{
400	local addr=$1; shift
401	local word
402	local flag
403
404	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
405		word=$1; shift
406		if [ "$1" == "-v" ]; then
407			flag=$1; shift
408		fi
409	fi
410
411	$@ | grep $addr | grep $flag "$word"
412}
413
414wait_for_port_up()
415{
416	"$@" | grep -q "Link detected: yes"
417}
418
419wait_for_offload()
420{
421	"$@" | grep -q offload
422}
423
424wait_for_trap()
425{
426	"$@" | grep -q trap
427}
428
429until_counter_is()
430{
431	local expr=$1; shift
432	local current=$("$@")
433
434	echo $((current))
435	((current $expr))
436}
437
438busywait_for_counter()
439{
440	local timeout=$1; shift
441	local delta=$1; shift
442
443	local base=$("$@")
444	busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
445}
446
447setup_wait_dev()
448{
449	local dev=$1; shift
450	local wait_time=${1:-$WAIT_TIME}; shift
451
452	setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
453
454	if (($?)); then
455		check_err 1
456		log_test setup_wait_dev ": Interface $dev does not come up."
457		exit 1
458	fi
459}
460
461setup_wait_dev_with_timeout()
462{
463	local dev=$1; shift
464	local max_iterations=${1:-$WAIT_TIMEOUT}; shift
465	local wait_time=${1:-$WAIT_TIME}; shift
466	local i
467
468	for ((i = 1; i <= $max_iterations; ++i)); do
469		ip link show dev $dev up \
470			| grep 'state UP' &> /dev/null
471		if [[ $? -ne 0 ]]; then
472			sleep 1
473		else
474			sleep $wait_time
475			return 0
476		fi
477	done
478
479	return 1
480}
481
482setup_wait()
483{
484	local num_netifs=${1:-$NUM_NETIFS}
485	local i
486
487	for ((i = 1; i <= num_netifs; ++i)); do
488		setup_wait_dev ${NETIFS[p$i]} 0
489	done
490
491	# Make sure links are ready.
492	sleep $WAIT_TIME
493}
494
495cmd_jq()
496{
497	local cmd=$1
498	local jq_exp=$2
499	local jq_opts=$3
500	local ret
501	local output
502
503	output="$($cmd)"
504	# it the command fails, return error right away
505	ret=$?
506	if [[ $ret -ne 0 ]]; then
507		return $ret
508	fi
509	output=$(echo $output | jq -r $jq_opts "$jq_exp")
510	ret=$?
511	if [[ $ret -ne 0 ]]; then
512		return $ret
513	fi
514	echo $output
515	# return success only in case of non-empty output
516	[ ! -z "$output" ]
517}
518
519lldpad_app_wait_set()
520{
521	local dev=$1; shift
522
523	while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
524		echo "$dev: waiting for lldpad to push pending APP updates"
525		sleep 5
526	done
527}
528
529lldpad_app_wait_del()
530{
531	# Give lldpad a chance to push down the changes. If the device is downed
532	# too soon, the updates will be left pending. However, they will have
533	# been struck off the lldpad's DB already, so we won't be able to tell
534	# they are pending. Then on next test iteration this would cause
535	# weirdness as newly-added APP rules conflict with the old ones,
536	# sometimes getting stuck in an "unknown" state.
537	sleep 5
538}
539
540pre_cleanup()
541{
542	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
543		echo "Pausing before cleanup, hit any key to continue"
544		read
545	fi
546
547	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
548		mac_addr_restore
549	fi
550}
551
552vrf_prepare()
553{
554	ip -4 rule add pref 32765 table local
555	ip -4 rule del pref 0
556	ip -6 rule add pref 32765 table local
557	ip -6 rule del pref 0
558}
559
560vrf_cleanup()
561{
562	ip -6 rule add pref 0 table local
563	ip -6 rule del pref 32765
564	ip -4 rule add pref 0 table local
565	ip -4 rule del pref 32765
566}
567
568__last_tb_id=0
569declare -A __TB_IDS
570
571__vrf_td_id_assign()
572{
573	local vrf_name=$1
574
575	__last_tb_id=$((__last_tb_id + 1))
576	__TB_IDS[$vrf_name]=$__last_tb_id
577	return $__last_tb_id
578}
579
580__vrf_td_id_lookup()
581{
582	local vrf_name=$1
583
584	return ${__TB_IDS[$vrf_name]}
585}
586
587vrf_create()
588{
589	local vrf_name=$1
590	local tb_id
591
592	__vrf_td_id_assign $vrf_name
593	tb_id=$?
594
595	ip link add dev $vrf_name type vrf table $tb_id
596	ip -4 route add table $tb_id unreachable default metric 4278198272
597	ip -6 route add table $tb_id unreachable default metric 4278198272
598}
599
600vrf_destroy()
601{
602	local vrf_name=$1
603	local tb_id
604
605	__vrf_td_id_lookup $vrf_name
606	tb_id=$?
607
608	ip -6 route del table $tb_id unreachable default metric 4278198272
609	ip -4 route del table $tb_id unreachable default metric 4278198272
610	ip link del dev $vrf_name
611}
612
613__addr_add_del()
614{
615	local if_name=$1
616	local add_del=$2
617	local array
618
619	shift
620	shift
621	array=("${@}")
622
623	for addrstr in "${array[@]}"; do
624		ip address $add_del $addrstr dev $if_name
625	done
626}
627
628__simple_if_init()
629{
630	local if_name=$1; shift
631	local vrf_name=$1; shift
632	local addrs=("${@}")
633
634	ip link set dev $if_name master $vrf_name
635	ip link set dev $if_name up
636
637	__addr_add_del $if_name add "${addrs[@]}"
638}
639
640__simple_if_fini()
641{
642	local if_name=$1; shift
643	local addrs=("${@}")
644
645	__addr_add_del $if_name del "${addrs[@]}"
646
647	ip link set dev $if_name down
648	ip link set dev $if_name nomaster
649}
650
651simple_if_init()
652{
653	local if_name=$1
654	local vrf_name
655	local array
656
657	shift
658	vrf_name=v$if_name
659	array=("${@}")
660
661	vrf_create $vrf_name
662	ip link set dev $vrf_name up
663	__simple_if_init $if_name $vrf_name "${array[@]}"
664}
665
666simple_if_fini()
667{
668	local if_name=$1
669	local vrf_name
670	local array
671
672	shift
673	vrf_name=v$if_name
674	array=("${@}")
675
676	__simple_if_fini $if_name "${array[@]}"
677	vrf_destroy $vrf_name
678}
679
680tunnel_create()
681{
682	local name=$1; shift
683	local type=$1; shift
684	local local=$1; shift
685	local remote=$1; shift
686
687	ip link add name $name type $type \
688	   local $local remote $remote "$@"
689	ip link set dev $name up
690}
691
692tunnel_destroy()
693{
694	local name=$1; shift
695
696	ip link del dev $name
697}
698
699vlan_create()
700{
701	local if_name=$1; shift
702	local vid=$1; shift
703	local vrf=$1; shift
704	local ips=("${@}")
705	local name=$if_name.$vid
706
707	ip link add name $name link $if_name type vlan id $vid
708	if [ "$vrf" != "" ]; then
709		ip link set dev $name master $vrf
710	fi
711	ip link set dev $name up
712	__addr_add_del $name add "${ips[@]}"
713}
714
715vlan_destroy()
716{
717	local if_name=$1; shift
718	local vid=$1; shift
719	local name=$if_name.$vid
720
721	ip link del dev $name
722}
723
724team_create()
725{
726	local if_name=$1; shift
727	local mode=$1; shift
728
729	require_command $TEAMD
730	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
731	for slave in "$@"; do
732		ip link set dev $slave down
733		ip link set dev $slave master $if_name
734		ip link set dev $slave up
735	done
736	ip link set dev $if_name up
737}
738
739team_destroy()
740{
741	local if_name=$1; shift
742
743	$TEAMD -t $if_name -k
744}
745
746master_name_get()
747{
748	local if_name=$1
749
750	ip -j link show dev $if_name | jq -r '.[]["master"]'
751}
752
753link_stats_get()
754{
755	local if_name=$1; shift
756	local dir=$1; shift
757	local stat=$1; shift
758
759	ip -j -s link show dev $if_name \
760		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
761}
762
763link_stats_tx_packets_get()
764{
765	link_stats_get $1 tx packets
766}
767
768link_stats_rx_errors_get()
769{
770	link_stats_get $1 rx errors
771}
772
773tc_rule_stats_get()
774{
775	local dev=$1; shift
776	local pref=$1; shift
777	local dir=$1; shift
778	local selector=${1:-.packets}; shift
779
780	tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
781	    | jq ".[1].options.actions[].stats$selector"
782}
783
784tc_rule_handle_stats_get()
785{
786	local id=$1; shift
787	local handle=$1; shift
788	local selector=${1:-.packets}; shift
789
790	tc -j -s filter show $id \
791	    | jq ".[] | select(.options.handle == $handle) | \
792		  .options.actions[0].stats$selector"
793}
794
795ethtool_stats_get()
796{
797	local dev=$1; shift
798	local stat=$1; shift
799
800	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
801}
802
803qdisc_stats_get()
804{
805	local dev=$1; shift
806	local handle=$1; shift
807	local selector=$1; shift
808
809	tc -j -s qdisc show dev "$dev" \
810	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
811}
812
813qdisc_parent_stats_get()
814{
815	local dev=$1; shift
816	local parent=$1; shift
817	local selector=$1; shift
818
819	tc -j -s qdisc show dev "$dev" invisible \
820	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
821}
822
823ipv6_stats_get()
824{
825	local dev=$1; shift
826	local stat=$1; shift
827
828	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
829}
830
831hw_stats_get()
832{
833	local suite=$1; shift
834	local if_name=$1; shift
835	local dir=$1; shift
836	local stat=$1; shift
837
838	ip -j stats show dev $if_name group offload subgroup $suite |
839		jq ".[0].stats64.$dir.$stat"
840}
841
842humanize()
843{
844	local speed=$1; shift
845
846	for unit in bps Kbps Mbps Gbps; do
847		if (($(echo "$speed < 1024" | bc))); then
848			break
849		fi
850
851		speed=$(echo "scale=1; $speed / 1024" | bc)
852	done
853
854	echo "$speed${unit}"
855}
856
857rate()
858{
859	local t0=$1; shift
860	local t1=$1; shift
861	local interval=$1; shift
862
863	echo $((8 * (t1 - t0) / interval))
864}
865
866packets_rate()
867{
868	local t0=$1; shift
869	local t1=$1; shift
870	local interval=$1; shift
871
872	echo $(((t1 - t0) / interval))
873}
874
875mac_get()
876{
877	local if_name=$1
878
879	ip -j link show dev $if_name | jq -r '.[]["address"]'
880}
881
882ipv6_lladdr_get()
883{
884	local if_name=$1
885
886	ip -j addr show dev $if_name | \
887		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
888		head -1
889}
890
891bridge_ageing_time_get()
892{
893	local bridge=$1
894	local ageing_time
895
896	# Need to divide by 100 to convert to seconds.
897	ageing_time=$(ip -j -d link show dev $bridge \
898		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
899	echo $((ageing_time / 100))
900}
901
902declare -A SYSCTL_ORIG
903sysctl_set()
904{
905	local key=$1; shift
906	local value=$1; shift
907
908	SYSCTL_ORIG[$key]=$(sysctl -n $key)
909	sysctl -qw $key=$value
910}
911
912sysctl_restore()
913{
914	local key=$1; shift
915
916	sysctl -qw $key=${SYSCTL_ORIG["$key"]}
917}
918
919forwarding_enable()
920{
921	sysctl_set net.ipv4.conf.all.forwarding 1
922	sysctl_set net.ipv6.conf.all.forwarding 1
923}
924
925forwarding_restore()
926{
927	sysctl_restore net.ipv6.conf.all.forwarding
928	sysctl_restore net.ipv4.conf.all.forwarding
929}
930
931declare -A MTU_ORIG
932mtu_set()
933{
934	local dev=$1; shift
935	local mtu=$1; shift
936
937	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
938	ip link set dev $dev mtu $mtu
939}
940
941mtu_restore()
942{
943	local dev=$1; shift
944
945	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
946}
947
948tc_offload_check()
949{
950	local num_netifs=${1:-$NUM_NETIFS}
951
952	for ((i = 1; i <= num_netifs; ++i)); do
953		ethtool -k ${NETIFS[p$i]} \
954			| grep "hw-tc-offload: on" &> /dev/null
955		if [[ $? -ne 0 ]]; then
956			return 1
957		fi
958	done
959
960	return 0
961}
962
963trap_install()
964{
965	local dev=$1; shift
966	local direction=$1; shift
967
968	# Some devices may not support or need in-hardware trapping of traffic
969	# (e.g. the veth pairs that this library creates for non-existent
970	# loopbacks). Use continue instead, so that there is a filter in there
971	# (some tests check counters), and so that other filters are still
972	# processed.
973	tc filter add dev $dev $direction pref 1 \
974		flower skip_sw action trap 2>/dev/null \
975	    || tc filter add dev $dev $direction pref 1 \
976		       flower action continue
977}
978
979trap_uninstall()
980{
981	local dev=$1; shift
982	local direction=$1; shift
983
984	tc filter del dev $dev $direction pref 1 flower
985}
986
987slow_path_trap_install()
988{
989	# For slow-path testing, we need to install a trap to get to
990	# slow path the packets that would otherwise be switched in HW.
991	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
992		trap_install "$@"
993	fi
994}
995
996slow_path_trap_uninstall()
997{
998	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
999		trap_uninstall "$@"
1000	fi
1001}
1002
1003__icmp_capture_add_del()
1004{
1005	local add_del=$1; shift
1006	local pref=$1; shift
1007	local vsuf=$1; shift
1008	local tundev=$1; shift
1009	local filter=$1; shift
1010
1011	tc filter $add_del dev "$tundev" ingress \
1012	   proto ip$vsuf pref $pref \
1013	   flower ip_proto icmp$vsuf $filter \
1014	   action pass
1015}
1016
1017icmp_capture_install()
1018{
1019	__icmp_capture_add_del add 100 "" "$@"
1020}
1021
1022icmp_capture_uninstall()
1023{
1024	__icmp_capture_add_del del 100 "" "$@"
1025}
1026
1027icmp6_capture_install()
1028{
1029	__icmp_capture_add_del add 100 v6 "$@"
1030}
1031
1032icmp6_capture_uninstall()
1033{
1034	__icmp_capture_add_del del 100 v6 "$@"
1035}
1036
1037__vlan_capture_add_del()
1038{
1039	local add_del=$1; shift
1040	local pref=$1; shift
1041	local dev=$1; shift
1042	local filter=$1; shift
1043
1044	tc filter $add_del dev "$dev" ingress \
1045	   proto 802.1q pref $pref \
1046	   flower $filter \
1047	   action pass
1048}
1049
1050vlan_capture_install()
1051{
1052	__vlan_capture_add_del add 100 "$@"
1053}
1054
1055vlan_capture_uninstall()
1056{
1057	__vlan_capture_add_del del 100 "$@"
1058}
1059
1060__dscp_capture_add_del()
1061{
1062	local add_del=$1; shift
1063	local dev=$1; shift
1064	local base=$1; shift
1065	local dscp;
1066
1067	for prio in {0..7}; do
1068		dscp=$((base + prio))
1069		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
1070				       "skip_hw ip_tos $((dscp << 2))"
1071	done
1072}
1073
1074dscp_capture_install()
1075{
1076	local dev=$1; shift
1077	local base=$1; shift
1078
1079	__dscp_capture_add_del add $dev $base
1080}
1081
1082dscp_capture_uninstall()
1083{
1084	local dev=$1; shift
1085	local base=$1; shift
1086
1087	__dscp_capture_add_del del $dev $base
1088}
1089
1090dscp_fetch_stats()
1091{
1092	local dev=$1; shift
1093	local base=$1; shift
1094
1095	for prio in {0..7}; do
1096		local dscp=$((base + prio))
1097		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
1098		echo "[$dscp]=$t "
1099	done
1100}
1101
1102matchall_sink_create()
1103{
1104	local dev=$1; shift
1105
1106	tc qdisc add dev $dev clsact
1107	tc filter add dev $dev ingress \
1108	   pref 10000 \
1109	   matchall \
1110	   action drop
1111}
1112
1113tests_run()
1114{
1115	local current_test
1116
1117	for current_test in ${TESTS:-$ALL_TESTS}; do
1118		$current_test
1119	done
1120}
1121
1122multipath_eval()
1123{
1124	local desc="$1"
1125	local weight_rp12=$2
1126	local weight_rp13=$3
1127	local packets_rp12=$4
1128	local packets_rp13=$5
1129	local weights_ratio packets_ratio diff
1130
1131	RET=0
1132
1133	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1134		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
1135				| bc -l)
1136	else
1137		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
1138				| bc -l)
1139	fi
1140
1141	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
1142	       check_err 1 "Packet difference is 0"
1143	       log_test "Multipath"
1144	       log_info "Expected ratio $weights_ratio"
1145	       return
1146	fi
1147
1148	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1149		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
1150				| bc -l)
1151	else
1152		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
1153				| bc -l)
1154	fi
1155
1156	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
1157	diff=${diff#-}
1158
1159	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
1160	check_err $? "Too large discrepancy between expected and measured ratios"
1161	log_test "$desc"
1162	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
1163}
1164
1165in_ns()
1166{
1167	local name=$1; shift
1168
1169	ip netns exec $name bash <<-EOF
1170		NUM_NETIFS=0
1171		source lib.sh
1172		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
1173	EOF
1174}
1175
1176##############################################################################
1177# Tests
1178
1179ping_do()
1180{
1181	local if_name=$1
1182	local dip=$2
1183	local args=$3
1184	local vrf_name
1185
1186	vrf_name=$(master_name_get $if_name)
1187	ip vrf exec $vrf_name \
1188		$PING $args $dip -c $PING_COUNT -i 0.1 \
1189		-w $PING_TIMEOUT &> /dev/null
1190}
1191
1192ping_test()
1193{
1194	RET=0
1195
1196	ping_do $1 $2
1197	check_err $?
1198	log_test "ping$3"
1199}
1200
1201ping6_do()
1202{
1203	local if_name=$1
1204	local dip=$2
1205	local args=$3
1206	local vrf_name
1207
1208	vrf_name=$(master_name_get $if_name)
1209	ip vrf exec $vrf_name \
1210		$PING6 $args $dip -c $PING_COUNT -i 0.1 \
1211		-w $PING_TIMEOUT &> /dev/null
1212}
1213
1214ping6_test()
1215{
1216	RET=0
1217
1218	ping6_do $1 $2
1219	check_err $?
1220	log_test "ping6$3"
1221}
1222
1223learning_test()
1224{
1225	local bridge=$1
1226	local br_port1=$2	# Connected to `host1_if`.
1227	local host1_if=$3
1228	local host2_if=$4
1229	local mac=de:ad:be:ef:13:37
1230	local ageing_time
1231
1232	RET=0
1233
1234	bridge -j fdb show br $bridge brport $br_port1 \
1235		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1236	check_fail $? "Found FDB record when should not"
1237
1238	# Disable unknown unicast flooding on `br_port1` to make sure
1239	# packets are only forwarded through the port after a matching
1240	# FDB entry was installed.
1241	bridge link set dev $br_port1 flood off
1242
1243	ip link set $host1_if promisc on
1244	tc qdisc add dev $host1_if ingress
1245	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
1246		flower dst_mac $mac action drop
1247
1248	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1249	sleep 1
1250
1251	tc -j -s filter show dev $host1_if ingress \
1252		| jq -e ".[] | select(.options.handle == 101) \
1253		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1254	check_fail $? "Packet reached first host when should not"
1255
1256	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1257	sleep 1
1258
1259	bridge -j fdb show br $bridge brport $br_port1 \
1260		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1261	check_err $? "Did not find FDB record when should"
1262
1263	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1264	sleep 1
1265
1266	tc -j -s filter show dev $host1_if ingress \
1267		| jq -e ".[] | select(.options.handle == 101) \
1268		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1269	check_err $? "Packet did not reach second host when should"
1270
1271	# Wait for 10 seconds after the ageing time to make sure FDB
1272	# record was aged-out.
1273	ageing_time=$(bridge_ageing_time_get $bridge)
1274	sleep $((ageing_time + 10))
1275
1276	bridge -j fdb show br $bridge brport $br_port1 \
1277		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1278	check_fail $? "Found FDB record when should not"
1279
1280	bridge link set dev $br_port1 learning off
1281
1282	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1283	sleep 1
1284
1285	bridge -j fdb show br $bridge brport $br_port1 \
1286		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1287	check_fail $? "Found FDB record when should not"
1288
1289	bridge link set dev $br_port1 learning on
1290
1291	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
1292	tc qdisc del dev $host1_if ingress
1293	ip link set $host1_if promisc off
1294
1295	bridge link set dev $br_port1 flood on
1296
1297	log_test "FDB learning"
1298}
1299
1300flood_test_do()
1301{
1302	local should_flood=$1
1303	local mac=$2
1304	local ip=$3
1305	local host1_if=$4
1306	local host2_if=$5
1307	local err=0
1308
1309	# Add an ACL on `host2_if` which will tell us whether the packet
1310	# was flooded to it or not.
1311	ip link set $host2_if promisc on
1312	tc qdisc add dev $host2_if ingress
1313	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
1314		flower dst_mac $mac action drop
1315
1316	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
1317	sleep 1
1318
1319	tc -j -s filter show dev $host2_if ingress \
1320		| jq -e ".[] | select(.options.handle == 101) \
1321		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1322	if [[ $? -ne 0 && $should_flood == "true" || \
1323	      $? -eq 0 && $should_flood == "false" ]]; then
1324		err=1
1325	fi
1326
1327	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
1328	tc qdisc del dev $host2_if ingress
1329	ip link set $host2_if promisc off
1330
1331	return $err
1332}
1333
1334flood_unicast_test()
1335{
1336	local br_port=$1
1337	local host1_if=$2
1338	local host2_if=$3
1339	local mac=de:ad:be:ef:13:37
1340	local ip=192.0.2.100
1341
1342	RET=0
1343
1344	bridge link set dev $br_port flood off
1345
1346	flood_test_do false $mac $ip $host1_if $host2_if
1347	check_err $? "Packet flooded when should not"
1348
1349	bridge link set dev $br_port flood on
1350
1351	flood_test_do true $mac $ip $host1_if $host2_if
1352	check_err $? "Packet was not flooded when should"
1353
1354	log_test "Unknown unicast flood"
1355}
1356
1357flood_multicast_test()
1358{
1359	local br_port=$1
1360	local host1_if=$2
1361	local host2_if=$3
1362	local mac=01:00:5e:00:00:01
1363	local ip=239.0.0.1
1364
1365	RET=0
1366
1367	bridge link set dev $br_port mcast_flood off
1368
1369	flood_test_do false $mac $ip $host1_if $host2_if
1370	check_err $? "Packet flooded when should not"
1371
1372	bridge link set dev $br_port mcast_flood on
1373
1374	flood_test_do true $mac $ip $host1_if $host2_if
1375	check_err $? "Packet was not flooded when should"
1376
1377	log_test "Unregistered multicast flood"
1378}
1379
1380flood_test()
1381{
1382	# `br_port` is connected to `host2_if`
1383	local br_port=$1
1384	local host1_if=$2
1385	local host2_if=$3
1386
1387	flood_unicast_test $br_port $host1_if $host2_if
1388	flood_multicast_test $br_port $host1_if $host2_if
1389}
1390
1391__start_traffic()
1392{
1393	local pktsize=$1; shift
1394	local proto=$1; shift
1395	local h_in=$1; shift    # Where the traffic egresses the host
1396	local sip=$1; shift
1397	local dip=$1; shift
1398	local dmac=$1; shift
1399
1400	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
1401		-a own -b $dmac -t "$proto" -q "$@" &
1402	sleep 1
1403}
1404
1405start_traffic_pktsize()
1406{
1407	local pktsize=$1; shift
1408
1409	__start_traffic $pktsize udp "$@"
1410}
1411
1412start_tcp_traffic_pktsize()
1413{
1414	local pktsize=$1; shift
1415
1416	__start_traffic $pktsize tcp "$@"
1417}
1418
1419start_traffic()
1420{
1421	start_traffic_pktsize 8000 "$@"
1422}
1423
1424start_tcp_traffic()
1425{
1426	start_tcp_traffic_pktsize 8000 "$@"
1427}
1428
1429stop_traffic()
1430{
1431	# Suppress noise from killing mausezahn.
1432	{ kill %% && wait %%; } 2>/dev/null
1433}
1434
1435declare -A cappid
1436declare -A capfile
1437declare -A capout
1438
1439tcpdump_start()
1440{
1441	local if_name=$1; shift
1442	local ns=$1; shift
1443
1444	capfile[$if_name]=$(mktemp)
1445	capout[$if_name]=$(mktemp)
1446
1447	if [ -z $ns ]; then
1448		ns_cmd=""
1449	else
1450		ns_cmd="ip netns exec ${ns}"
1451	fi
1452
1453	if [ -z $SUDO_USER ] ; then
1454		capuser=""
1455	else
1456		capuser="-Z $SUDO_USER"
1457	fi
1458
1459	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
1460		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
1461		> "${capout[$if_name]}" 2>&1 &
1462	cappid[$if_name]=$!
1463
1464	sleep 1
1465}
1466
1467tcpdump_stop()
1468{
1469	local if_name=$1
1470	local pid=${cappid[$if_name]}
1471
1472	$ns_cmd kill "$pid" && wait "$pid"
1473	sleep 1
1474}
1475
1476tcpdump_cleanup()
1477{
1478	local if_name=$1
1479
1480	rm ${capfile[$if_name]} ${capout[$if_name]}
1481}
1482
1483tcpdump_show()
1484{
1485	local if_name=$1
1486
1487	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
1488}
1489
1490# return 0 if the packet wasn't seen on host2_if or 1 if it was
1491mcast_packet_test()
1492{
1493	local mac=$1
1494	local src_ip=$2
1495	local ip=$3
1496	local host1_if=$4
1497	local host2_if=$5
1498	local seen=0
1499	local tc_proto="ip"
1500	local mz_v6arg=""
1501
1502	# basic check to see if we were passed an IPv4 address, if not assume IPv6
1503	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
1504		tc_proto="ipv6"
1505		mz_v6arg="-6"
1506	fi
1507
1508	# Add an ACL on `host2_if` which will tell us whether the packet
1509	# was received by it or not.
1510	tc qdisc add dev $host2_if ingress
1511	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
1512		flower ip_proto udp dst_mac $mac action drop
1513
1514	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
1515	sleep 1
1516
1517	tc -j -s filter show dev $host2_if ingress \
1518		| jq -e ".[] | select(.options.handle == 101) \
1519		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1520	if [[ $? -eq 0 ]]; then
1521		seen=1
1522	fi
1523
1524	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
1525	tc qdisc del dev $host2_if ingress
1526
1527	return $seen
1528}
1529
1530brmcast_check_sg_entries()
1531{
1532	local report=$1; shift
1533	local slist=("$@")
1534	local sarg=""
1535
1536	for src in "${slist[@]}"; do
1537		sarg="${sarg} and .source_list[].address == \"$src\""
1538	done
1539	bridge -j -d -s mdb show dev br0 \
1540		| jq -e ".[].mdb[] | \
1541			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
1542	check_err $? "Wrong *,G entry source list after $report report"
1543
1544	for sgent in "${slist[@]}"; do
1545		bridge -j -d -s mdb show dev br0 \
1546			| jq -e ".[].mdb[] | \
1547				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
1548		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
1549	done
1550}
1551
1552brmcast_check_sg_fwding()
1553{
1554	local should_fwd=$1; shift
1555	local sources=("$@")
1556
1557	for src in "${sources[@]}"; do
1558		local retval=0
1559
1560		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
1561		retval=$?
1562		if [ $should_fwd -eq 1 ]; then
1563			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
1564		else
1565			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
1566		fi
1567	done
1568}
1569
1570brmcast_check_sg_state()
1571{
1572	local is_blocked=$1; shift
1573	local sources=("$@")
1574	local should_fail=1
1575
1576	if [ $is_blocked -eq 1 ]; then
1577		should_fail=0
1578	fi
1579
1580	for src in "${sources[@]}"; do
1581		bridge -j -d -s mdb show dev br0 \
1582			| jq -e ".[].mdb[] | \
1583				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
1584				 .source_list[] |
1585				 select(.address == \"$src\") |
1586				 select(.timer == \"0.00\")" &>/dev/null
1587		check_err_fail $should_fail $? "Entry $src has zero timer"
1588
1589		bridge -j -d -s mdb show dev br0 \
1590			| jq -e ".[].mdb[] | \
1591				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
1592				 .flags[] == \"blocked\")" &>/dev/null
1593		check_err_fail $should_fail $? "Entry $src has blocked flag"
1594	done
1595}
1596
1597mc_join()
1598{
1599	local if_name=$1
1600	local group=$2
1601	local vrf_name=$(master_name_get $if_name)
1602
1603	# We don't care about actual reception, just about joining the
1604	# IP multicast group and adding the L2 address to the device's
1605	# MAC filtering table
1606	ip vrf exec $vrf_name \
1607		mreceive -g $group -I $if_name > /dev/null 2>&1 &
1608	mreceive_pid=$!
1609
1610	sleep 1
1611}
1612
1613mc_leave()
1614{
1615	kill "$mreceive_pid" && wait "$mreceive_pid"
1616}
1617
1618mc_send()
1619{
1620	local if_name=$1
1621	local groups=$2
1622	local vrf_name=$(master_name_get $if_name)
1623
1624	ip vrf exec $vrf_name \
1625		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
1626}
1627
1628start_ip_monitor()
1629{
1630	local mtype=$1; shift
1631	local ip=${1-ip}; shift
1632
1633	# start the monitor in the background
1634	tmpfile=`mktemp /var/run/nexthoptestXXX`
1635	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
1636	sleep 0.2
1637	echo "$mpid $tmpfile"
1638}
1639
1640stop_ip_monitor()
1641{
1642	local mpid=$1; shift
1643	local tmpfile=$1; shift
1644	local el=$1; shift
1645	local what=$1; shift
1646
1647	sleep 0.2
1648	kill $mpid
1649	local lines=`grep '^\w' $tmpfile | wc -l`
1650	test $lines -eq $el
1651	check_err $? "$what: $lines lines of events, expected $el"
1652	rm -rf $tmpfile
1653}
1654
1655hw_stats_monitor_test()
1656{
1657	local dev=$1; shift
1658	local type=$1; shift
1659	local make_suitable=$1; shift
1660	local make_unsuitable=$1; shift
1661	local ip=${1-ip}; shift
1662
1663	RET=0
1664
1665	# Expect a notification about enablement.
1666	local ipmout=$(start_ip_monitor stats "$ip")
1667	$ip stats set dev $dev ${type}_stats on
1668	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
1669
1670	# Expect a notification about offload.
1671	local ipmout=$(start_ip_monitor stats "$ip")
1672	$make_suitable
1673	stop_ip_monitor $ipmout 1 "${type}_stats installation"
1674
1675	# Expect a notification about loss of offload.
1676	local ipmout=$(start_ip_monitor stats "$ip")
1677	$make_unsuitable
1678	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
1679
1680	# Expect a notification about disablement
1681	local ipmout=$(start_ip_monitor stats "$ip")
1682	$ip stats set dev $dev ${type}_stats off
1683	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
1684
1685	log_test "${type}_stats notifications"
1686}
1687