1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4##############################################################################
5# Defines
6
7# Kselftest framework requirement - SKIP code is 4.
8ksft_skip=4
9
10# Can be overridden by the configuration file.
11PING=${PING:=ping}
12PING6=${PING6:=ping6}
13MZ=${MZ:=mausezahn}
14ARPING=${ARPING:=arping}
15TEAMD=${TEAMD:=teamd}
16WAIT_TIME=${WAIT_TIME:=5}
17PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
18PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
19NETIF_TYPE=${NETIF_TYPE:=veth}
20NETIF_CREATE=${NETIF_CREATE:=yes}
21MCD=${MCD:=smcrouted}
22MC_CLI=${MC_CLI:=smcroutectl}
23PING_COUNT=${PING_COUNT:=10}
24PING_TIMEOUT=${PING_TIMEOUT:=5}
25WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
26INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
27LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
28REQUIRE_JQ=${REQUIRE_JQ:=yes}
29REQUIRE_MZ=${REQUIRE_MZ:=yes}
30REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
31STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
32TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
33
34relative_path="${BASH_SOURCE%/*}"
35if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
36	relative_path="."
37fi
38
39if [[ -f $relative_path/forwarding.config ]]; then
40	source "$relative_path/forwarding.config"
41fi
42
43##############################################################################
44# Sanity checks
45
46check_tc_version()
47{
48	tc -j &> /dev/null
49	if [[ $? -ne 0 ]]; then
50		echo "SKIP: iproute2 too old; tc is missing JSON support"
51		exit $ksft_skip
52	fi
53}
54
55# Old versions of tc don't understand "mpls_uc"
56check_tc_mpls_support()
57{
58	local dev=$1; shift
59
60	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
61		matchall action pipe &> /dev/null
62	if [[ $? -ne 0 ]]; then
63		echo "SKIP: iproute2 too old; tc is missing MPLS support"
64		return $ksft_skip
65	fi
66	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
67		matchall
68}
69
70# Old versions of tc produce invalid json output for mpls lse statistics
71check_tc_mpls_lse_stats()
72{
73	local dev=$1; shift
74	local ret;
75
76	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
77		flower mpls lse depth 2                                 \
78		action continue &> /dev/null
79
80	if [[ $? -ne 0 ]]; then
81		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
82		return $ksft_skip
83	fi
84
85	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
86	ret=$?
87	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
88		flower
89
90	if [[ $ret -ne 0 ]]; then
91		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
92		return $ksft_skip
93	fi
94}
95
96check_tc_shblock_support()
97{
98	tc filter help 2>&1 | grep block &> /dev/null
99	if [[ $? -ne 0 ]]; then
100		echo "SKIP: iproute2 too old; tc is missing shared block support"
101		exit $ksft_skip
102	fi
103}
104
105check_tc_chain_support()
106{
107	tc help 2>&1|grep chain &> /dev/null
108	if [[ $? -ne 0 ]]; then
109		echo "SKIP: iproute2 too old; tc is missing chain support"
110		exit $ksft_skip
111	fi
112}
113
114check_tc_action_hw_stats_support()
115{
116	tc actions help 2>&1 | grep -q hw_stats
117	if [[ $? -ne 0 ]]; then
118		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
119		exit $ksft_skip
120	fi
121}
122
123check_ethtool_lanes_support()
124{
125	ethtool --help 2>&1| grep lanes &> /dev/null
126	if [[ $? -ne 0 ]]; then
127		echo "SKIP: ethtool too old; it is missing lanes support"
128		exit $ksft_skip
129	fi
130}
131
132check_locked_port_support()
133{
134	if ! bridge -d link show | grep -q " locked"; then
135		echo "SKIP: iproute2 too old; Locked port feature not supported."
136		return $ksft_skip
137	fi
138}
139
140if [[ "$(id -u)" -ne 0 ]]; then
141	echo "SKIP: need root privileges"
142	exit $ksft_skip
143fi
144
145if [[ "$CHECK_TC" = "yes" ]]; then
146	check_tc_version
147fi
148
149require_command()
150{
151	local cmd=$1; shift
152
153	if [[ ! -x "$(command -v "$cmd")" ]]; then
154		echo "SKIP: $cmd not installed"
155		exit $ksft_skip
156	fi
157}
158
159if [[ "$REQUIRE_JQ" = "yes" ]]; then
160	require_command jq
161fi
162if [[ "$REQUIRE_MZ" = "yes" ]]; then
163	require_command $MZ
164fi
165if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
166	# https://github.com/vladimiroltean/mtools/
167	# patched for IPv6 support
168	require_command msend
169	require_command mreceive
170fi
171
172if [[ ! -v NUM_NETIFS ]]; then
173	echo "SKIP: importer does not define \"NUM_NETIFS\""
174	exit $ksft_skip
175fi
176
177##############################################################################
178# Command line options handling
179
180count=0
181
182while [[ $# -gt 0 ]]; do
183	if [[ "$count" -eq "0" ]]; then
184		unset NETIFS
185		declare -A NETIFS
186	fi
187	count=$((count + 1))
188	NETIFS[p$count]="$1"
189	shift
190done
191
192##############################################################################
193# Network interfaces configuration
194
195create_netif_veth()
196{
197	local i
198
199	for ((i = 1; i <= NUM_NETIFS; ++i)); do
200		local j=$((i+1))
201
202		ip link show dev ${NETIFS[p$i]} &> /dev/null
203		if [[ $? -ne 0 ]]; then
204			ip link add ${NETIFS[p$i]} type veth \
205				peer name ${NETIFS[p$j]}
206			if [[ $? -ne 0 ]]; then
207				echo "Failed to create netif"
208				exit 1
209			fi
210		fi
211		i=$j
212	done
213}
214
215create_netif()
216{
217	case "$NETIF_TYPE" in
218	veth) create_netif_veth
219	      ;;
220	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
221	   exit 1
222	   ;;
223	esac
224}
225
226declare -A MAC_ADDR_ORIG
227mac_addr_prepare()
228{
229	local new_addr=
230	local dev=
231
232	for ((i = 1; i <= NUM_NETIFS; ++i)); do
233		dev=${NETIFS[p$i]}
234		new_addr=$(printf "00:01:02:03:04:%02x" $i)
235
236		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
237		# Strip quotes
238		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
239		ip link set dev $dev address $new_addr
240	done
241}
242
243mac_addr_restore()
244{
245	local dev=
246
247	for ((i = 1; i <= NUM_NETIFS; ++i)); do
248		dev=${NETIFS[p$i]}
249		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
250	done
251}
252
253if [[ "$NETIF_CREATE" = "yes" ]]; then
254	create_netif
255fi
256
257if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
258	mac_addr_prepare
259fi
260
261for ((i = 1; i <= NUM_NETIFS; ++i)); do
262	ip link show dev ${NETIFS[p$i]} &> /dev/null
263	if [[ $? -ne 0 ]]; then
264		echo "SKIP: could not find all required interfaces"
265		exit $ksft_skip
266	fi
267done
268
269##############################################################################
270# Helpers
271
272# Exit status to return at the end. Set in case one of the tests fails.
273EXIT_STATUS=0
274# Per-test return value. Clear at the beginning of each test.
275RET=0
276
277check_err()
278{
279	local err=$1
280	local msg=$2
281
282	if [[ $RET -eq 0 && $err -ne 0 ]]; then
283		RET=$err
284		retmsg=$msg
285	fi
286}
287
288check_fail()
289{
290	local err=$1
291	local msg=$2
292
293	if [[ $RET -eq 0 && $err -eq 0 ]]; then
294		RET=1
295		retmsg=$msg
296	fi
297}
298
299check_err_fail()
300{
301	local should_fail=$1; shift
302	local err=$1; shift
303	local what=$1; shift
304
305	if ((should_fail)); then
306		check_fail $err "$what succeeded, but should have failed"
307	else
308		check_err $err "$what failed"
309	fi
310}
311
312log_test()
313{
314	local test_name=$1
315	local opt_str=$2
316
317	if [[ $# -eq 2 ]]; then
318		opt_str="($opt_str)"
319	fi
320
321	if [[ $RET -ne 0 ]]; then
322		EXIT_STATUS=1
323		printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
324		if [[ ! -z "$retmsg" ]]; then
325			printf "\t%s\n" "$retmsg"
326		fi
327		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
328			echo "Hit enter to continue, 'q' to quit"
329			read a
330			[ "$a" = "q" ] && exit 1
331		fi
332		return 1
333	fi
334
335	printf "TEST: %-60s  [ OK ]\n" "$test_name $opt_str"
336	return 0
337}
338
339log_test_skip()
340{
341	local test_name=$1
342	local opt_str=$2
343
344	printf "TEST: %-60s  [SKIP]\n" "$test_name $opt_str"
345	return 0
346}
347
348log_info()
349{
350	local msg=$1
351
352	echo "INFO: $msg"
353}
354
355busywait()
356{
357	local timeout=$1; shift
358
359	local start_time="$(date -u +%s%3N)"
360	while true
361	do
362		local out
363		out=$("$@")
364		local ret=$?
365		if ((!ret)); then
366			echo -n "$out"
367			return 0
368		fi
369
370		local current_time="$(date -u +%s%3N)"
371		if ((current_time - start_time > timeout)); then
372			echo -n "$out"
373			return 1
374		fi
375	done
376}
377
378not()
379{
380	"$@"
381	[[ $? != 0 ]]
382}
383
384get_max()
385{
386	local arr=("$@")
387
388	max=${arr[0]}
389	for cur in ${arr[@]}; do
390		if [[ $cur -gt $max ]]; then
391			max=$cur
392		fi
393	done
394
395	echo $max
396}
397
398grep_bridge_fdb()
399{
400	local addr=$1; shift
401	local word
402	local flag
403
404	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
405		word=$1; shift
406		if [ "$1" == "-v" ]; then
407			flag=$1; shift
408		fi
409	fi
410
411	$@ | grep $addr | grep $flag "$word"
412}
413
414wait_for_port_up()
415{
416	"$@" | grep -q "Link detected: yes"
417}
418
419wait_for_offload()
420{
421	"$@" | grep -q offload
422}
423
424wait_for_trap()
425{
426	"$@" | grep -q trap
427}
428
429until_counter_is()
430{
431	local expr=$1; shift
432	local current=$("$@")
433
434	echo $((current))
435	((current $expr))
436}
437
438busywait_for_counter()
439{
440	local timeout=$1; shift
441	local delta=$1; shift
442
443	local base=$("$@")
444	busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
445}
446
447setup_wait_dev()
448{
449	local dev=$1; shift
450	local wait_time=${1:-$WAIT_TIME}; shift
451
452	setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
453
454	if (($?)); then
455		check_err 1
456		log_test setup_wait_dev ": Interface $dev does not come up."
457		exit 1
458	fi
459}
460
461setup_wait_dev_with_timeout()
462{
463	local dev=$1; shift
464	local max_iterations=${1:-$WAIT_TIMEOUT}; shift
465	local wait_time=${1:-$WAIT_TIME}; shift
466	local i
467
468	for ((i = 1; i <= $max_iterations; ++i)); do
469		ip link show dev $dev up \
470			| grep 'state UP' &> /dev/null
471		if [[ $? -ne 0 ]]; then
472			sleep 1
473		else
474			sleep $wait_time
475			return 0
476		fi
477	done
478
479	return 1
480}
481
482setup_wait()
483{
484	local num_netifs=${1:-$NUM_NETIFS}
485	local i
486
487	for ((i = 1; i <= num_netifs; ++i)); do
488		setup_wait_dev ${NETIFS[p$i]} 0
489	done
490
491	# Make sure links are ready.
492	sleep $WAIT_TIME
493}
494
495cmd_jq()
496{
497	local cmd=$1
498	local jq_exp=$2
499	local jq_opts=$3
500	local ret
501	local output
502
503	output="$($cmd)"
504	# it the command fails, return error right away
505	ret=$?
506	if [[ $ret -ne 0 ]]; then
507		return $ret
508	fi
509	output=$(echo $output | jq -r $jq_opts "$jq_exp")
510	ret=$?
511	if [[ $ret -ne 0 ]]; then
512		return $ret
513	fi
514	echo $output
515	# return success only in case of non-empty output
516	[ ! -z "$output" ]
517}
518
519lldpad_app_wait_set()
520{
521	local dev=$1; shift
522
523	while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
524		echo "$dev: waiting for lldpad to push pending APP updates"
525		sleep 5
526	done
527}
528
529lldpad_app_wait_del()
530{
531	# Give lldpad a chance to push down the changes. If the device is downed
532	# too soon, the updates will be left pending. However, they will have
533	# been struck off the lldpad's DB already, so we won't be able to tell
534	# they are pending. Then on next test iteration this would cause
535	# weirdness as newly-added APP rules conflict with the old ones,
536	# sometimes getting stuck in an "unknown" state.
537	sleep 5
538}
539
540pre_cleanup()
541{
542	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
543		echo "Pausing before cleanup, hit any key to continue"
544		read
545	fi
546
547	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
548		mac_addr_restore
549	fi
550}
551
552vrf_prepare()
553{
554	ip -4 rule add pref 32765 table local
555	ip -4 rule del pref 0
556	ip -6 rule add pref 32765 table local
557	ip -6 rule del pref 0
558}
559
560vrf_cleanup()
561{
562	ip -6 rule add pref 0 table local
563	ip -6 rule del pref 32765
564	ip -4 rule add pref 0 table local
565	ip -4 rule del pref 32765
566}
567
568__last_tb_id=0
569declare -A __TB_IDS
570
571__vrf_td_id_assign()
572{
573	local vrf_name=$1
574
575	__last_tb_id=$((__last_tb_id + 1))
576	__TB_IDS[$vrf_name]=$__last_tb_id
577	return $__last_tb_id
578}
579
580__vrf_td_id_lookup()
581{
582	local vrf_name=$1
583
584	return ${__TB_IDS[$vrf_name]}
585}
586
587vrf_create()
588{
589	local vrf_name=$1
590	local tb_id
591
592	__vrf_td_id_assign $vrf_name
593	tb_id=$?
594
595	ip link add dev $vrf_name type vrf table $tb_id
596	ip -4 route add table $tb_id unreachable default metric 4278198272
597	ip -6 route add table $tb_id unreachable default metric 4278198272
598}
599
600vrf_destroy()
601{
602	local vrf_name=$1
603	local tb_id
604
605	__vrf_td_id_lookup $vrf_name
606	tb_id=$?
607
608	ip -6 route del table $tb_id unreachable default metric 4278198272
609	ip -4 route del table $tb_id unreachable default metric 4278198272
610	ip link del dev $vrf_name
611}
612
613__addr_add_del()
614{
615	local if_name=$1
616	local add_del=$2
617	local array
618
619	shift
620	shift
621	array=("${@}")
622
623	for addrstr in "${array[@]}"; do
624		ip address $add_del $addrstr dev $if_name
625	done
626}
627
628__simple_if_init()
629{
630	local if_name=$1; shift
631	local vrf_name=$1; shift
632	local addrs=("${@}")
633
634	ip link set dev $if_name master $vrf_name
635	ip link set dev $if_name up
636
637	__addr_add_del $if_name add "${addrs[@]}"
638}
639
640__simple_if_fini()
641{
642	local if_name=$1; shift
643	local addrs=("${@}")
644
645	__addr_add_del $if_name del "${addrs[@]}"
646
647	ip link set dev $if_name down
648	ip link set dev $if_name nomaster
649}
650
651simple_if_init()
652{
653	local if_name=$1
654	local vrf_name
655	local array
656
657	shift
658	vrf_name=v$if_name
659	array=("${@}")
660
661	vrf_create $vrf_name
662	ip link set dev $vrf_name up
663	__simple_if_init $if_name $vrf_name "${array[@]}"
664}
665
666simple_if_fini()
667{
668	local if_name=$1
669	local vrf_name
670	local array
671
672	shift
673	vrf_name=v$if_name
674	array=("${@}")
675
676	__simple_if_fini $if_name "${array[@]}"
677	vrf_destroy $vrf_name
678}
679
680tunnel_create()
681{
682	local name=$1; shift
683	local type=$1; shift
684	local local=$1; shift
685	local remote=$1; shift
686
687	ip link add name $name type $type \
688	   local $local remote $remote "$@"
689	ip link set dev $name up
690}
691
692tunnel_destroy()
693{
694	local name=$1; shift
695
696	ip link del dev $name
697}
698
699vlan_create()
700{
701	local if_name=$1; shift
702	local vid=$1; shift
703	local vrf=$1; shift
704	local ips=("${@}")
705	local name=$if_name.$vid
706
707	ip link add name $name link $if_name type vlan id $vid
708	if [ "$vrf" != "" ]; then
709		ip link set dev $name master $vrf
710	fi
711	ip link set dev $name up
712	__addr_add_del $name add "${ips[@]}"
713}
714
715vlan_destroy()
716{
717	local if_name=$1; shift
718	local vid=$1; shift
719	local name=$if_name.$vid
720
721	ip link del dev $name
722}
723
724team_create()
725{
726	local if_name=$1; shift
727	local mode=$1; shift
728
729	require_command $TEAMD
730	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
731	for slave in "$@"; do
732		ip link set dev $slave down
733		ip link set dev $slave master $if_name
734		ip link set dev $slave up
735	done
736	ip link set dev $if_name up
737}
738
739team_destroy()
740{
741	local if_name=$1; shift
742
743	$TEAMD -t $if_name -k
744}
745
746master_name_get()
747{
748	local if_name=$1
749
750	ip -j link show dev $if_name | jq -r '.[]["master"]'
751}
752
753link_stats_get()
754{
755	local if_name=$1; shift
756	local dir=$1; shift
757	local stat=$1; shift
758
759	ip -j -s link show dev $if_name \
760		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
761}
762
763link_stats_tx_packets_get()
764{
765	link_stats_get $1 tx packets
766}
767
768link_stats_rx_errors_get()
769{
770	link_stats_get $1 rx errors
771}
772
773tc_rule_stats_get()
774{
775	local dev=$1; shift
776	local pref=$1; shift
777	local dir=$1; shift
778	local selector=${1:-.packets}; shift
779
780	tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
781	    | jq ".[1].options.actions[].stats$selector"
782}
783
784tc_rule_handle_stats_get()
785{
786	local id=$1; shift
787	local handle=$1; shift
788	local selector=${1:-.packets}; shift
789
790	tc -j -s filter show $id \
791	    | jq ".[] | select(.options.handle == $handle) | \
792		  .options.actions[0].stats$selector"
793}
794
795ethtool_stats_get()
796{
797	local dev=$1; shift
798	local stat=$1; shift
799
800	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
801}
802
803qdisc_stats_get()
804{
805	local dev=$1; shift
806	local handle=$1; shift
807	local selector=$1; shift
808
809	tc -j -s qdisc show dev "$dev" \
810	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
811}
812
813qdisc_parent_stats_get()
814{
815	local dev=$1; shift
816	local parent=$1; shift
817	local selector=$1; shift
818
819	tc -j -s qdisc show dev "$dev" invisible \
820	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
821}
822
823ipv6_stats_get()
824{
825	local dev=$1; shift
826	local stat=$1; shift
827
828	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
829}
830
831hw_stats_get()
832{
833	local suite=$1; shift
834	local if_name=$1; shift
835	local dir=$1; shift
836	local stat=$1; shift
837
838	ip -j stats show dev $if_name group offload subgroup $suite |
839		jq ".[0].stats64.$dir.$stat"
840}
841
842humanize()
843{
844	local speed=$1; shift
845
846	for unit in bps Kbps Mbps Gbps; do
847		if (($(echo "$speed < 1024" | bc))); then
848			break
849		fi
850
851		speed=$(echo "scale=1; $speed / 1024" | bc)
852	done
853
854	echo "$speed${unit}"
855}
856
857rate()
858{
859	local t0=$1; shift
860	local t1=$1; shift
861	local interval=$1; shift
862
863	echo $((8 * (t1 - t0) / interval))
864}
865
866packets_rate()
867{
868	local t0=$1; shift
869	local t1=$1; shift
870	local interval=$1; shift
871
872	echo $(((t1 - t0) / interval))
873}
874
875mac_get()
876{
877	local if_name=$1
878
879	ip -j link show dev $if_name | jq -r '.[]["address"]'
880}
881
882ipv6_lladdr_get()
883{
884	local if_name=$1
885
886	ip -j addr show dev $if_name | \
887		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
888		head -1
889}
890
891bridge_ageing_time_get()
892{
893	local bridge=$1
894	local ageing_time
895
896	# Need to divide by 100 to convert to seconds.
897	ageing_time=$(ip -j -d link show dev $bridge \
898		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
899	echo $((ageing_time / 100))
900}
901
902declare -A SYSCTL_ORIG
903sysctl_set()
904{
905	local key=$1; shift
906	local value=$1; shift
907
908	SYSCTL_ORIG[$key]=$(sysctl -n $key)
909	sysctl -qw $key=$value
910}
911
912sysctl_restore()
913{
914	local key=$1; shift
915
916	sysctl -qw $key=${SYSCTL_ORIG["$key"]}
917}
918
919forwarding_enable()
920{
921	sysctl_set net.ipv4.conf.all.forwarding 1
922	sysctl_set net.ipv6.conf.all.forwarding 1
923}
924
925forwarding_restore()
926{
927	sysctl_restore net.ipv6.conf.all.forwarding
928	sysctl_restore net.ipv4.conf.all.forwarding
929}
930
931declare -A MTU_ORIG
932mtu_set()
933{
934	local dev=$1; shift
935	local mtu=$1; shift
936
937	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
938	ip link set dev $dev mtu $mtu
939}
940
941mtu_restore()
942{
943	local dev=$1; shift
944
945	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
946}
947
948tc_offload_check()
949{
950	local num_netifs=${1:-$NUM_NETIFS}
951
952	for ((i = 1; i <= num_netifs; ++i)); do
953		ethtool -k ${NETIFS[p$i]} \
954			| grep "hw-tc-offload: on" &> /dev/null
955		if [[ $? -ne 0 ]]; then
956			return 1
957		fi
958	done
959
960	return 0
961}
962
963trap_install()
964{
965	local dev=$1; shift
966	local direction=$1; shift
967
968	# Some devices may not support or need in-hardware trapping of traffic
969	# (e.g. the veth pairs that this library creates for non-existent
970	# loopbacks). Use continue instead, so that there is a filter in there
971	# (some tests check counters), and so that other filters are still
972	# processed.
973	tc filter add dev $dev $direction pref 1 \
974		flower skip_sw action trap 2>/dev/null \
975	    || tc filter add dev $dev $direction pref 1 \
976		       flower action continue
977}
978
979trap_uninstall()
980{
981	local dev=$1; shift
982	local direction=$1; shift
983
984	tc filter del dev $dev $direction pref 1 flower
985}
986
987slow_path_trap_install()
988{
989	# For slow-path testing, we need to install a trap to get to
990	# slow path the packets that would otherwise be switched in HW.
991	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
992		trap_install "$@"
993	fi
994}
995
996slow_path_trap_uninstall()
997{
998	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
999		trap_uninstall "$@"
1000	fi
1001}
1002
1003__icmp_capture_add_del()
1004{
1005	local add_del=$1; shift
1006	local pref=$1; shift
1007	local vsuf=$1; shift
1008	local tundev=$1; shift
1009	local filter=$1; shift
1010
1011	tc filter $add_del dev "$tundev" ingress \
1012	   proto ip$vsuf pref $pref \
1013	   flower ip_proto icmp$vsuf $filter \
1014	   action pass
1015}
1016
1017icmp_capture_install()
1018{
1019	__icmp_capture_add_del add 100 "" "$@"
1020}
1021
1022icmp_capture_uninstall()
1023{
1024	__icmp_capture_add_del del 100 "" "$@"
1025}
1026
1027icmp6_capture_install()
1028{
1029	__icmp_capture_add_del add 100 v6 "$@"
1030}
1031
1032icmp6_capture_uninstall()
1033{
1034	__icmp_capture_add_del del 100 v6 "$@"
1035}
1036
1037__vlan_capture_add_del()
1038{
1039	local add_del=$1; shift
1040	local pref=$1; shift
1041	local dev=$1; shift
1042	local filter=$1; shift
1043
1044	tc filter $add_del dev "$dev" ingress \
1045	   proto 802.1q pref $pref \
1046	   flower $filter \
1047	   action pass
1048}
1049
1050vlan_capture_install()
1051{
1052	__vlan_capture_add_del add 100 "$@"
1053}
1054
1055vlan_capture_uninstall()
1056{
1057	__vlan_capture_add_del del 100 "$@"
1058}
1059
1060__dscp_capture_add_del()
1061{
1062	local add_del=$1; shift
1063	local dev=$1; shift
1064	local base=$1; shift
1065	local dscp;
1066
1067	for prio in {0..7}; do
1068		dscp=$((base + prio))
1069		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
1070				       "skip_hw ip_tos $((dscp << 2))"
1071	done
1072}
1073
1074dscp_capture_install()
1075{
1076	local dev=$1; shift
1077	local base=$1; shift
1078
1079	__dscp_capture_add_del add $dev $base
1080}
1081
1082dscp_capture_uninstall()
1083{
1084	local dev=$1; shift
1085	local base=$1; shift
1086
1087	__dscp_capture_add_del del $dev $base
1088}
1089
1090dscp_fetch_stats()
1091{
1092	local dev=$1; shift
1093	local base=$1; shift
1094
1095	for prio in {0..7}; do
1096		local dscp=$((base + prio))
1097		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
1098		echo "[$dscp]=$t "
1099	done
1100}
1101
1102matchall_sink_create()
1103{
1104	local dev=$1; shift
1105
1106	tc qdisc add dev $dev clsact
1107	tc filter add dev $dev ingress \
1108	   pref 10000 \
1109	   matchall \
1110	   action drop
1111}
1112
1113tests_run()
1114{
1115	local current_test
1116
1117	for current_test in ${TESTS:-$ALL_TESTS}; do
1118		$current_test
1119	done
1120}
1121
1122multipath_eval()
1123{
1124	local desc="$1"
1125	local weight_rp12=$2
1126	local weight_rp13=$3
1127	local packets_rp12=$4
1128	local packets_rp13=$5
1129	local weights_ratio packets_ratio diff
1130
1131	RET=0
1132
1133	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1134		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
1135				| bc -l)
1136	else
1137		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
1138				| bc -l)
1139	fi
1140
1141	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
1142	       check_err 1 "Packet difference is 0"
1143	       log_test "Multipath"
1144	       log_info "Expected ratio $weights_ratio"
1145	       return
1146	fi
1147
1148	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1149		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
1150				| bc -l)
1151	else
1152		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
1153				| bc -l)
1154	fi
1155
1156	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
1157	diff=${diff#-}
1158
1159	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
1160	check_err $? "Too large discrepancy between expected and measured ratios"
1161	log_test "$desc"
1162	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
1163}
1164
1165in_ns()
1166{
1167	local name=$1; shift
1168
1169	ip netns exec $name bash <<-EOF
1170		NUM_NETIFS=0
1171		source lib.sh
1172		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
1173	EOF
1174}
1175
1176##############################################################################
1177# Tests
1178
1179ping_do()
1180{
1181	local if_name=$1
1182	local dip=$2
1183	local args=$3
1184	local vrf_name
1185
1186	vrf_name=$(master_name_get $if_name)
1187	ip vrf exec $vrf_name \
1188		$PING $args $dip -c $PING_COUNT -i 0.1 \
1189		-w $PING_TIMEOUT &> /dev/null
1190}
1191
1192ping_test()
1193{
1194	RET=0
1195
1196	ping_do $1 $2
1197	check_err $?
1198	log_test "ping$3"
1199}
1200
1201ping6_do()
1202{
1203	local if_name=$1
1204	local dip=$2
1205	local args=$3
1206	local vrf_name
1207
1208	vrf_name=$(master_name_get $if_name)
1209	ip vrf exec $vrf_name \
1210		$PING6 $args $dip -c $PING_COUNT -i 0.1 \
1211		-w $PING_TIMEOUT &> /dev/null
1212}
1213
1214ping6_test()
1215{
1216	RET=0
1217
1218	ping6_do $1 $2
1219	check_err $?
1220	log_test "ping6$3"
1221}
1222
1223learning_test()
1224{
1225	local bridge=$1
1226	local br_port1=$2	# Connected to `host1_if`.
1227	local host1_if=$3
1228	local host2_if=$4
1229	local mac=de:ad:be:ef:13:37
1230	local ageing_time
1231
1232	RET=0
1233
1234	bridge -j fdb show br $bridge brport $br_port1 \
1235		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1236	check_fail $? "Found FDB record when should not"
1237
1238	# Disable unknown unicast flooding on `br_port1` to make sure
1239	# packets are only forwarded through the port after a matching
1240	# FDB entry was installed.
1241	bridge link set dev $br_port1 flood off
1242
1243	tc qdisc add dev $host1_if ingress
1244	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
1245		flower dst_mac $mac action drop
1246
1247	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1248	sleep 1
1249
1250	tc -j -s filter show dev $host1_if ingress \
1251		| jq -e ".[] | select(.options.handle == 101) \
1252		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1253	check_fail $? "Packet reached second host when should not"
1254
1255	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1256	sleep 1
1257
1258	bridge -j fdb show br $bridge brport $br_port1 \
1259		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1260	check_err $? "Did not find FDB record when should"
1261
1262	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1263	sleep 1
1264
1265	tc -j -s filter show dev $host1_if ingress \
1266		| jq -e ".[] | select(.options.handle == 101) \
1267		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1268	check_err $? "Packet did not reach second host when should"
1269
1270	# Wait for 10 seconds after the ageing time to make sure FDB
1271	# record was aged-out.
1272	ageing_time=$(bridge_ageing_time_get $bridge)
1273	sleep $((ageing_time + 10))
1274
1275	bridge -j fdb show br $bridge brport $br_port1 \
1276		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1277	check_fail $? "Found FDB record when should not"
1278
1279	bridge link set dev $br_port1 learning off
1280
1281	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1282	sleep 1
1283
1284	bridge -j fdb show br $bridge brport $br_port1 \
1285		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1286	check_fail $? "Found FDB record when should not"
1287
1288	bridge link set dev $br_port1 learning on
1289
1290	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
1291	tc qdisc del dev $host1_if ingress
1292
1293	bridge link set dev $br_port1 flood on
1294
1295	log_test "FDB learning"
1296}
1297
1298flood_test_do()
1299{
1300	local should_flood=$1
1301	local mac=$2
1302	local ip=$3
1303	local host1_if=$4
1304	local host2_if=$5
1305	local err=0
1306
1307	# Add an ACL on `host2_if` which will tell us whether the packet
1308	# was flooded to it or not.
1309	tc qdisc add dev $host2_if ingress
1310	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
1311		flower dst_mac $mac action drop
1312
1313	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
1314	sleep 1
1315
1316	tc -j -s filter show dev $host2_if ingress \
1317		| jq -e ".[] | select(.options.handle == 101) \
1318		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1319	if [[ $? -ne 0 && $should_flood == "true" || \
1320	      $? -eq 0 && $should_flood == "false" ]]; then
1321		err=1
1322	fi
1323
1324	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
1325	tc qdisc del dev $host2_if ingress
1326
1327	return $err
1328}
1329
1330flood_unicast_test()
1331{
1332	local br_port=$1
1333	local host1_if=$2
1334	local host2_if=$3
1335	local mac=de:ad:be:ef:13:37
1336	local ip=192.0.2.100
1337
1338	RET=0
1339
1340	bridge link set dev $br_port flood off
1341
1342	flood_test_do false $mac $ip $host1_if $host2_if
1343	check_err $? "Packet flooded when should not"
1344
1345	bridge link set dev $br_port flood on
1346
1347	flood_test_do true $mac $ip $host1_if $host2_if
1348	check_err $? "Packet was not flooded when should"
1349
1350	log_test "Unknown unicast flood"
1351}
1352
1353flood_multicast_test()
1354{
1355	local br_port=$1
1356	local host1_if=$2
1357	local host2_if=$3
1358	local mac=01:00:5e:00:00:01
1359	local ip=239.0.0.1
1360
1361	RET=0
1362
1363	bridge link set dev $br_port mcast_flood off
1364
1365	flood_test_do false $mac $ip $host1_if $host2_if
1366	check_err $? "Packet flooded when should not"
1367
1368	bridge link set dev $br_port mcast_flood on
1369
1370	flood_test_do true $mac $ip $host1_if $host2_if
1371	check_err $? "Packet was not flooded when should"
1372
1373	log_test "Unregistered multicast flood"
1374}
1375
1376flood_test()
1377{
1378	# `br_port` is connected to `host2_if`
1379	local br_port=$1
1380	local host1_if=$2
1381	local host2_if=$3
1382
1383	flood_unicast_test $br_port $host1_if $host2_if
1384	flood_multicast_test $br_port $host1_if $host2_if
1385}
1386
1387__start_traffic()
1388{
1389	local pktsize=$1; shift
1390	local proto=$1; shift
1391	local h_in=$1; shift    # Where the traffic egresses the host
1392	local sip=$1; shift
1393	local dip=$1; shift
1394	local dmac=$1; shift
1395
1396	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
1397		-a own -b $dmac -t "$proto" -q "$@" &
1398	sleep 1
1399}
1400
1401start_traffic_pktsize()
1402{
1403	local pktsize=$1; shift
1404
1405	__start_traffic $pktsize udp "$@"
1406}
1407
1408start_tcp_traffic_pktsize()
1409{
1410	local pktsize=$1; shift
1411
1412	__start_traffic $pktsize tcp "$@"
1413}
1414
1415start_traffic()
1416{
1417	start_traffic_pktsize 8000 "$@"
1418}
1419
1420start_tcp_traffic()
1421{
1422	start_tcp_traffic_pktsize 8000 "$@"
1423}
1424
1425stop_traffic()
1426{
1427	# Suppress noise from killing mausezahn.
1428	{ kill %% && wait %%; } 2>/dev/null
1429}
1430
1431declare -A cappid
1432declare -A capfile
1433declare -A capout
1434
1435tcpdump_start()
1436{
1437	local if_name=$1; shift
1438	local ns=$1; shift
1439
1440	capfile[$if_name]=$(mktemp)
1441	capout[$if_name]=$(mktemp)
1442
1443	if [ -z $ns ]; then
1444		ns_cmd=""
1445	else
1446		ns_cmd="ip netns exec ${ns}"
1447	fi
1448
1449	if [ -z $SUDO_USER ] ; then
1450		capuser=""
1451	else
1452		capuser="-Z $SUDO_USER"
1453	fi
1454
1455	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
1456		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
1457		> "${capout[$if_name]}" 2>&1 &
1458	cappid[$if_name]=$!
1459
1460	sleep 1
1461}
1462
1463tcpdump_stop()
1464{
1465	local if_name=$1
1466	local pid=${cappid[$if_name]}
1467
1468	$ns_cmd kill "$pid" && wait "$pid"
1469	sleep 1
1470}
1471
1472tcpdump_cleanup()
1473{
1474	local if_name=$1
1475
1476	rm ${capfile[$if_name]} ${capout[$if_name]}
1477}
1478
1479tcpdump_show()
1480{
1481	local if_name=$1
1482
1483	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
1484}
1485
1486# return 0 if the packet wasn't seen on host2_if or 1 if it was
1487mcast_packet_test()
1488{
1489	local mac=$1
1490	local src_ip=$2
1491	local ip=$3
1492	local host1_if=$4
1493	local host2_if=$5
1494	local seen=0
1495	local tc_proto="ip"
1496	local mz_v6arg=""
1497
1498	# basic check to see if we were passed an IPv4 address, if not assume IPv6
1499	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
1500		tc_proto="ipv6"
1501		mz_v6arg="-6"
1502	fi
1503
1504	# Add an ACL on `host2_if` which will tell us whether the packet
1505	# was received by it or not.
1506	tc qdisc add dev $host2_if ingress
1507	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
1508		flower ip_proto udp dst_mac $mac action drop
1509
1510	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
1511	sleep 1
1512
1513	tc -j -s filter show dev $host2_if ingress \
1514		| jq -e ".[] | select(.options.handle == 101) \
1515		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1516	if [[ $? -eq 0 ]]; then
1517		seen=1
1518	fi
1519
1520	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
1521	tc qdisc del dev $host2_if ingress
1522
1523	return $seen
1524}
1525
1526brmcast_check_sg_entries()
1527{
1528	local report=$1; shift
1529	local slist=("$@")
1530	local sarg=""
1531
1532	for src in "${slist[@]}"; do
1533		sarg="${sarg} and .source_list[].address == \"$src\""
1534	done
1535	bridge -j -d -s mdb show dev br0 \
1536		| jq -e ".[].mdb[] | \
1537			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
1538	check_err $? "Wrong *,G entry source list after $report report"
1539
1540	for sgent in "${slist[@]}"; do
1541		bridge -j -d -s mdb show dev br0 \
1542			| jq -e ".[].mdb[] | \
1543				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
1544		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
1545	done
1546}
1547
1548brmcast_check_sg_fwding()
1549{
1550	local should_fwd=$1; shift
1551	local sources=("$@")
1552
1553	for src in "${sources[@]}"; do
1554		local retval=0
1555
1556		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
1557		retval=$?
1558		if [ $should_fwd -eq 1 ]; then
1559			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
1560		else
1561			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
1562		fi
1563	done
1564}
1565
1566brmcast_check_sg_state()
1567{
1568	local is_blocked=$1; shift
1569	local sources=("$@")
1570	local should_fail=1
1571
1572	if [ $is_blocked -eq 1 ]; then
1573		should_fail=0
1574	fi
1575
1576	for src in "${sources[@]}"; do
1577		bridge -j -d -s mdb show dev br0 \
1578			| jq -e ".[].mdb[] | \
1579				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
1580				 .source_list[] |
1581				 select(.address == \"$src\") |
1582				 select(.timer == \"0.00\")" &>/dev/null
1583		check_err_fail $should_fail $? "Entry $src has zero timer"
1584
1585		bridge -j -d -s mdb show dev br0 \
1586			| jq -e ".[].mdb[] | \
1587				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
1588				 .flags[] == \"blocked\")" &>/dev/null
1589		check_err_fail $should_fail $? "Entry $src has blocked flag"
1590	done
1591}
1592
1593mc_join()
1594{
1595	local if_name=$1
1596	local group=$2
1597	local vrf_name=$(master_name_get $if_name)
1598
1599	# We don't care about actual reception, just about joining the
1600	# IP multicast group and adding the L2 address to the device's
1601	# MAC filtering table
1602	ip vrf exec $vrf_name \
1603		mreceive -g $group -I $if_name > /dev/null 2>&1 &
1604	mreceive_pid=$!
1605
1606	sleep 1
1607}
1608
1609mc_leave()
1610{
1611	kill "$mreceive_pid" && wait "$mreceive_pid"
1612}
1613
1614mc_send()
1615{
1616	local if_name=$1
1617	local groups=$2
1618	local vrf_name=$(master_name_get $if_name)
1619
1620	ip vrf exec $vrf_name \
1621		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
1622}
1623
1624start_ip_monitor()
1625{
1626	local mtype=$1; shift
1627	local ip=${1-ip}; shift
1628
1629	# start the monitor in the background
1630	tmpfile=`mktemp /var/run/nexthoptestXXX`
1631	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
1632	sleep 0.2
1633	echo "$mpid $tmpfile"
1634}
1635
1636stop_ip_monitor()
1637{
1638	local mpid=$1; shift
1639	local tmpfile=$1; shift
1640	local el=$1; shift
1641	local what=$1; shift
1642
1643	sleep 0.2
1644	kill $mpid
1645	local lines=`grep '^\w' $tmpfile | wc -l`
1646	test $lines -eq $el
1647	check_err $? "$what: $lines lines of events, expected $el"
1648	rm -rf $tmpfile
1649}
1650
1651hw_stats_monitor_test()
1652{
1653	local dev=$1; shift
1654	local type=$1; shift
1655	local make_suitable=$1; shift
1656	local make_unsuitable=$1; shift
1657	local ip=${1-ip}; shift
1658
1659	RET=0
1660
1661	# Expect a notification about enablement.
1662	local ipmout=$(start_ip_monitor stats "$ip")
1663	$ip stats set dev $dev ${type}_stats on
1664	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
1665
1666	# Expect a notification about offload.
1667	local ipmout=$(start_ip_monitor stats "$ip")
1668	$make_suitable
1669	stop_ip_monitor $ipmout 1 "${type}_stats installation"
1670
1671	# Expect a notification about loss of offload.
1672	local ipmout=$(start_ip_monitor stats "$ip")
1673	$make_unsuitable
1674	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
1675
1676	# Expect a notification about disablement
1677	local ipmout=$(start_ip_monitor stats "$ip")
1678	$ip stats set dev $dev ${type}_stats off
1679	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
1680
1681	log_test "${type}_stats notifications"
1682}
1683