1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4##############################################################################
5# Defines
6
7# Kselftest framework requirement - SKIP code is 4.
8ksft_skip=4
9
10# Can be overridden by the configuration file.
11PING=${PING:=ping}
12PING6=${PING6:=ping6}
13MZ=${MZ:=mausezahn}
14ARPING=${ARPING:=arping}
15TEAMD=${TEAMD:=teamd}
16WAIT_TIME=${WAIT_TIME:=5}
17PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
18PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
19NETIF_TYPE=${NETIF_TYPE:=veth}
20NETIF_CREATE=${NETIF_CREATE:=yes}
21MCD=${MCD:=smcrouted}
22MC_CLI=${MC_CLI:=smcroutectl}
23PING_COUNT=${PING_COUNT:=10}
24PING_TIMEOUT=${PING_TIMEOUT:=5}
25WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
26INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
27LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
28REQUIRE_JQ=${REQUIRE_JQ:=yes}
29REQUIRE_MZ=${REQUIRE_MZ:=yes}
30REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
31STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
32TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
33
34relative_path="${BASH_SOURCE%/*}"
35if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
36	relative_path="."
37fi
38
39if [[ -f $relative_path/forwarding.config ]]; then
40	source "$relative_path/forwarding.config"
41fi
42
43##############################################################################
44# Sanity checks
45
46check_tc_version()
47{
48	tc -j &> /dev/null
49	if [[ $? -ne 0 ]]; then
50		echo "SKIP: iproute2 too old; tc is missing JSON support"
51		exit $ksft_skip
52	fi
53}
54
55# Old versions of tc don't understand "mpls_uc"
56check_tc_mpls_support()
57{
58	local dev=$1; shift
59
60	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
61		matchall action pipe &> /dev/null
62	if [[ $? -ne 0 ]]; then
63		echo "SKIP: iproute2 too old; tc is missing MPLS support"
64		return $ksft_skip
65	fi
66	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
67		matchall
68}
69
70# Old versions of tc produce invalid json output for mpls lse statistics
71check_tc_mpls_lse_stats()
72{
73	local dev=$1; shift
74	local ret;
75
76	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
77		flower mpls lse depth 2                                 \
78		action continue &> /dev/null
79
80	if [[ $? -ne 0 ]]; then
81		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
82		return $ksft_skip
83	fi
84
85	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
86	ret=$?
87	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
88		flower
89
90	if [[ $ret -ne 0 ]]; then
91		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
92		return $ksft_skip
93	fi
94}
95
96check_tc_shblock_support()
97{
98	tc filter help 2>&1 | grep block &> /dev/null
99	if [[ $? -ne 0 ]]; then
100		echo "SKIP: iproute2 too old; tc is missing shared block support"
101		exit $ksft_skip
102	fi
103}
104
105check_tc_chain_support()
106{
107	tc help 2>&1|grep chain &> /dev/null
108	if [[ $? -ne 0 ]]; then
109		echo "SKIP: iproute2 too old; tc is missing chain support"
110		exit $ksft_skip
111	fi
112}
113
114check_tc_action_hw_stats_support()
115{
116	tc actions help 2>&1 | grep -q hw_stats
117	if [[ $? -ne 0 ]]; then
118		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
119		exit $ksft_skip
120	fi
121}
122
123check_ethtool_lanes_support()
124{
125	ethtool --help 2>&1| grep lanes &> /dev/null
126	if [[ $? -ne 0 ]]; then
127		echo "SKIP: ethtool too old; it is missing lanes support"
128		exit $ksft_skip
129	fi
130}
131
132check_locked_port_support()
133{
134	if ! bridge -d link show | grep -q " locked"; then
135		echo "SKIP: iproute2 too old; Locked port feature not supported."
136		return $ksft_skip
137	fi
138}
139
140check_port_mab_support()
141{
142	if ! bridge -d link show | grep -q "mab"; then
143		echo "SKIP: iproute2 too old; MacAuth feature not supported."
144		return $ksft_skip
145	fi
146}
147
148if [[ "$(id -u)" -ne 0 ]]; then
149	echo "SKIP: need root privileges"
150	exit $ksft_skip
151fi
152
153if [[ "$CHECK_TC" = "yes" ]]; then
154	check_tc_version
155fi
156
157require_command()
158{
159	local cmd=$1; shift
160
161	if [[ ! -x "$(command -v "$cmd")" ]]; then
162		echo "SKIP: $cmd not installed"
163		exit $ksft_skip
164	fi
165}
166
167if [[ "$REQUIRE_JQ" = "yes" ]]; then
168	require_command jq
169fi
170if [[ "$REQUIRE_MZ" = "yes" ]]; then
171	require_command $MZ
172fi
173if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
174	# https://github.com/vladimiroltean/mtools/
175	# patched for IPv6 support
176	require_command msend
177	require_command mreceive
178fi
179
180if [[ ! -v NUM_NETIFS ]]; then
181	echo "SKIP: importer does not define \"NUM_NETIFS\""
182	exit $ksft_skip
183fi
184
185##############################################################################
186# Command line options handling
187
188count=0
189
190while [[ $# -gt 0 ]]; do
191	if [[ "$count" -eq "0" ]]; then
192		unset NETIFS
193		declare -A NETIFS
194	fi
195	count=$((count + 1))
196	NETIFS[p$count]="$1"
197	shift
198done
199
200##############################################################################
201# Network interfaces configuration
202
203create_netif_veth()
204{
205	local i
206
207	for ((i = 1; i <= NUM_NETIFS; ++i)); do
208		local j=$((i+1))
209
210		ip link show dev ${NETIFS[p$i]} &> /dev/null
211		if [[ $? -ne 0 ]]; then
212			ip link add ${NETIFS[p$i]} type veth \
213				peer name ${NETIFS[p$j]}
214			if [[ $? -ne 0 ]]; then
215				echo "Failed to create netif"
216				exit 1
217			fi
218		fi
219		i=$j
220	done
221}
222
223create_netif()
224{
225	case "$NETIF_TYPE" in
226	veth) create_netif_veth
227	      ;;
228	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
229	   exit 1
230	   ;;
231	esac
232}
233
234declare -A MAC_ADDR_ORIG
235mac_addr_prepare()
236{
237	local new_addr=
238	local dev=
239
240	for ((i = 1; i <= NUM_NETIFS; ++i)); do
241		dev=${NETIFS[p$i]}
242		new_addr=$(printf "00:01:02:03:04:%02x" $i)
243
244		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
245		# Strip quotes
246		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
247		ip link set dev $dev address $new_addr
248	done
249}
250
251mac_addr_restore()
252{
253	local dev=
254
255	for ((i = 1; i <= NUM_NETIFS; ++i)); do
256		dev=${NETIFS[p$i]}
257		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
258	done
259}
260
261if [[ "$NETIF_CREATE" = "yes" ]]; then
262	create_netif
263fi
264
265if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
266	mac_addr_prepare
267fi
268
269for ((i = 1; i <= NUM_NETIFS; ++i)); do
270	ip link show dev ${NETIFS[p$i]} &> /dev/null
271	if [[ $? -ne 0 ]]; then
272		echo "SKIP: could not find all required interfaces"
273		exit $ksft_skip
274	fi
275done
276
277##############################################################################
278# Helpers
279
280# Exit status to return at the end. Set in case one of the tests fails.
281EXIT_STATUS=0
282# Per-test return value. Clear at the beginning of each test.
283RET=0
284
285check_err()
286{
287	local err=$1
288	local msg=$2
289
290	if [[ $RET -eq 0 && $err -ne 0 ]]; then
291		RET=$err
292		retmsg=$msg
293	fi
294}
295
296check_fail()
297{
298	local err=$1
299	local msg=$2
300
301	if [[ $RET -eq 0 && $err -eq 0 ]]; then
302		RET=1
303		retmsg=$msg
304	fi
305}
306
307check_err_fail()
308{
309	local should_fail=$1; shift
310	local err=$1; shift
311	local what=$1; shift
312
313	if ((should_fail)); then
314		check_fail $err "$what succeeded, but should have failed"
315	else
316		check_err $err "$what failed"
317	fi
318}
319
320log_test()
321{
322	local test_name=$1
323	local opt_str=$2
324
325	if [[ $# -eq 2 ]]; then
326		opt_str="($opt_str)"
327	fi
328
329	if [[ $RET -ne 0 ]]; then
330		EXIT_STATUS=1
331		printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
332		if [[ ! -z "$retmsg" ]]; then
333			printf "\t%s\n" "$retmsg"
334		fi
335		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
336			echo "Hit enter to continue, 'q' to quit"
337			read a
338			[ "$a" = "q" ] && exit 1
339		fi
340		return 1
341	fi
342
343	printf "TEST: %-60s  [ OK ]\n" "$test_name $opt_str"
344	return 0
345}
346
347log_test_skip()
348{
349	local test_name=$1
350	local opt_str=$2
351
352	printf "TEST: %-60s  [SKIP]\n" "$test_name $opt_str"
353	return 0
354}
355
356log_info()
357{
358	local msg=$1
359
360	echo "INFO: $msg"
361}
362
363busywait()
364{
365	local timeout=$1; shift
366
367	local start_time="$(date -u +%s%3N)"
368	while true
369	do
370		local out
371		out=$("$@")
372		local ret=$?
373		if ((!ret)); then
374			echo -n "$out"
375			return 0
376		fi
377
378		local current_time="$(date -u +%s%3N)"
379		if ((current_time - start_time > timeout)); then
380			echo -n "$out"
381			return 1
382		fi
383	done
384}
385
386not()
387{
388	"$@"
389	[[ $? != 0 ]]
390}
391
392get_max()
393{
394	local arr=("$@")
395
396	max=${arr[0]}
397	for cur in ${arr[@]}; do
398		if [[ $cur -gt $max ]]; then
399			max=$cur
400		fi
401	done
402
403	echo $max
404}
405
406grep_bridge_fdb()
407{
408	local addr=$1; shift
409	local word
410	local flag
411
412	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
413		word=$1; shift
414		if [ "$1" == "-v" ]; then
415			flag=$1; shift
416		fi
417	fi
418
419	$@ | grep $addr | grep $flag "$word"
420}
421
422wait_for_port_up()
423{
424	"$@" | grep -q "Link detected: yes"
425}
426
427wait_for_offload()
428{
429	"$@" | grep -q offload
430}
431
432wait_for_trap()
433{
434	"$@" | grep -q trap
435}
436
437until_counter_is()
438{
439	local expr=$1; shift
440	local current=$("$@")
441
442	echo $((current))
443	((current $expr))
444}
445
446busywait_for_counter()
447{
448	local timeout=$1; shift
449	local delta=$1; shift
450
451	local base=$("$@")
452	busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
453}
454
455setup_wait_dev()
456{
457	local dev=$1; shift
458	local wait_time=${1:-$WAIT_TIME}; shift
459
460	setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
461
462	if (($?)); then
463		check_err 1
464		log_test setup_wait_dev ": Interface $dev does not come up."
465		exit 1
466	fi
467}
468
469setup_wait_dev_with_timeout()
470{
471	local dev=$1; shift
472	local max_iterations=${1:-$WAIT_TIMEOUT}; shift
473	local wait_time=${1:-$WAIT_TIME}; shift
474	local i
475
476	for ((i = 1; i <= $max_iterations; ++i)); do
477		ip link show dev $dev up \
478			| grep 'state UP' &> /dev/null
479		if [[ $? -ne 0 ]]; then
480			sleep 1
481		else
482			sleep $wait_time
483			return 0
484		fi
485	done
486
487	return 1
488}
489
490setup_wait()
491{
492	local num_netifs=${1:-$NUM_NETIFS}
493	local i
494
495	for ((i = 1; i <= num_netifs; ++i)); do
496		setup_wait_dev ${NETIFS[p$i]} 0
497	done
498
499	# Make sure links are ready.
500	sleep $WAIT_TIME
501}
502
503cmd_jq()
504{
505	local cmd=$1
506	local jq_exp=$2
507	local jq_opts=$3
508	local ret
509	local output
510
511	output="$($cmd)"
512	# it the command fails, return error right away
513	ret=$?
514	if [[ $ret -ne 0 ]]; then
515		return $ret
516	fi
517	output=$(echo $output | jq -r $jq_opts "$jq_exp")
518	ret=$?
519	if [[ $ret -ne 0 ]]; then
520		return $ret
521	fi
522	echo $output
523	# return success only in case of non-empty output
524	[ ! -z "$output" ]
525}
526
527lldpad_app_wait_set()
528{
529	local dev=$1; shift
530
531	while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
532		echo "$dev: waiting for lldpad to push pending APP updates"
533		sleep 5
534	done
535}
536
537lldpad_app_wait_del()
538{
539	# Give lldpad a chance to push down the changes. If the device is downed
540	# too soon, the updates will be left pending. However, they will have
541	# been struck off the lldpad's DB already, so we won't be able to tell
542	# they are pending. Then on next test iteration this would cause
543	# weirdness as newly-added APP rules conflict with the old ones,
544	# sometimes getting stuck in an "unknown" state.
545	sleep 5
546}
547
548pre_cleanup()
549{
550	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
551		echo "Pausing before cleanup, hit any key to continue"
552		read
553	fi
554
555	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
556		mac_addr_restore
557	fi
558}
559
560vrf_prepare()
561{
562	ip -4 rule add pref 32765 table local
563	ip -4 rule del pref 0
564	ip -6 rule add pref 32765 table local
565	ip -6 rule del pref 0
566}
567
568vrf_cleanup()
569{
570	ip -6 rule add pref 0 table local
571	ip -6 rule del pref 32765
572	ip -4 rule add pref 0 table local
573	ip -4 rule del pref 32765
574}
575
576__last_tb_id=0
577declare -A __TB_IDS
578
579__vrf_td_id_assign()
580{
581	local vrf_name=$1
582
583	__last_tb_id=$((__last_tb_id + 1))
584	__TB_IDS[$vrf_name]=$__last_tb_id
585	return $__last_tb_id
586}
587
588__vrf_td_id_lookup()
589{
590	local vrf_name=$1
591
592	return ${__TB_IDS[$vrf_name]}
593}
594
595vrf_create()
596{
597	local vrf_name=$1
598	local tb_id
599
600	__vrf_td_id_assign $vrf_name
601	tb_id=$?
602
603	ip link add dev $vrf_name type vrf table $tb_id
604	ip -4 route add table $tb_id unreachable default metric 4278198272
605	ip -6 route add table $tb_id unreachable default metric 4278198272
606}
607
608vrf_destroy()
609{
610	local vrf_name=$1
611	local tb_id
612
613	__vrf_td_id_lookup $vrf_name
614	tb_id=$?
615
616	ip -6 route del table $tb_id unreachable default metric 4278198272
617	ip -4 route del table $tb_id unreachable default metric 4278198272
618	ip link del dev $vrf_name
619}
620
621__addr_add_del()
622{
623	local if_name=$1
624	local add_del=$2
625	local array
626
627	shift
628	shift
629	array=("${@}")
630
631	for addrstr in "${array[@]}"; do
632		ip address $add_del $addrstr dev $if_name
633	done
634}
635
636__simple_if_init()
637{
638	local if_name=$1; shift
639	local vrf_name=$1; shift
640	local addrs=("${@}")
641
642	ip link set dev $if_name master $vrf_name
643	ip link set dev $if_name up
644
645	__addr_add_del $if_name add "${addrs[@]}"
646}
647
648__simple_if_fini()
649{
650	local if_name=$1; shift
651	local addrs=("${@}")
652
653	__addr_add_del $if_name del "${addrs[@]}"
654
655	ip link set dev $if_name down
656	ip link set dev $if_name nomaster
657}
658
659simple_if_init()
660{
661	local if_name=$1
662	local vrf_name
663	local array
664
665	shift
666	vrf_name=v$if_name
667	array=("${@}")
668
669	vrf_create $vrf_name
670	ip link set dev $vrf_name up
671	__simple_if_init $if_name $vrf_name "${array[@]}"
672}
673
674simple_if_fini()
675{
676	local if_name=$1
677	local vrf_name
678	local array
679
680	shift
681	vrf_name=v$if_name
682	array=("${@}")
683
684	__simple_if_fini $if_name "${array[@]}"
685	vrf_destroy $vrf_name
686}
687
688tunnel_create()
689{
690	local name=$1; shift
691	local type=$1; shift
692	local local=$1; shift
693	local remote=$1; shift
694
695	ip link add name $name type $type \
696	   local $local remote $remote "$@"
697	ip link set dev $name up
698}
699
700tunnel_destroy()
701{
702	local name=$1; shift
703
704	ip link del dev $name
705}
706
707vlan_create()
708{
709	local if_name=$1; shift
710	local vid=$1; shift
711	local vrf=$1; shift
712	local ips=("${@}")
713	local name=$if_name.$vid
714
715	ip link add name $name link $if_name type vlan id $vid
716	if [ "$vrf" != "" ]; then
717		ip link set dev $name master $vrf
718	fi
719	ip link set dev $name up
720	__addr_add_del $name add "${ips[@]}"
721}
722
723vlan_destroy()
724{
725	local if_name=$1; shift
726	local vid=$1; shift
727	local name=$if_name.$vid
728
729	ip link del dev $name
730}
731
732team_create()
733{
734	local if_name=$1; shift
735	local mode=$1; shift
736
737	require_command $TEAMD
738	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
739	for slave in "$@"; do
740		ip link set dev $slave down
741		ip link set dev $slave master $if_name
742		ip link set dev $slave up
743	done
744	ip link set dev $if_name up
745}
746
747team_destroy()
748{
749	local if_name=$1; shift
750
751	$TEAMD -t $if_name -k
752}
753
754master_name_get()
755{
756	local if_name=$1
757
758	ip -j link show dev $if_name | jq -r '.[]["master"]'
759}
760
761link_stats_get()
762{
763	local if_name=$1; shift
764	local dir=$1; shift
765	local stat=$1; shift
766
767	ip -j -s link show dev $if_name \
768		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
769}
770
771link_stats_tx_packets_get()
772{
773	link_stats_get $1 tx packets
774}
775
776link_stats_rx_errors_get()
777{
778	link_stats_get $1 rx errors
779}
780
781tc_rule_stats_get()
782{
783	local dev=$1; shift
784	local pref=$1; shift
785	local dir=$1; shift
786	local selector=${1:-.packets}; shift
787
788	tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
789	    | jq ".[1].options.actions[].stats$selector"
790}
791
792tc_rule_handle_stats_get()
793{
794	local id=$1; shift
795	local handle=$1; shift
796	local selector=${1:-.packets}; shift
797
798	tc -j -s filter show $id \
799	    | jq ".[] | select(.options.handle == $handle) | \
800		  .options.actions[0].stats$selector"
801}
802
803ethtool_stats_get()
804{
805	local dev=$1; shift
806	local stat=$1; shift
807
808	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
809}
810
811qdisc_stats_get()
812{
813	local dev=$1; shift
814	local handle=$1; shift
815	local selector=$1; shift
816
817	tc -j -s qdisc show dev "$dev" \
818	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
819}
820
821qdisc_parent_stats_get()
822{
823	local dev=$1; shift
824	local parent=$1; shift
825	local selector=$1; shift
826
827	tc -j -s qdisc show dev "$dev" invisible \
828	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
829}
830
831ipv6_stats_get()
832{
833	local dev=$1; shift
834	local stat=$1; shift
835
836	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
837}
838
839hw_stats_get()
840{
841	local suite=$1; shift
842	local if_name=$1; shift
843	local dir=$1; shift
844	local stat=$1; shift
845
846	ip -j stats show dev $if_name group offload subgroup $suite |
847		jq ".[0].stats64.$dir.$stat"
848}
849
850humanize()
851{
852	local speed=$1; shift
853
854	for unit in bps Kbps Mbps Gbps; do
855		if (($(echo "$speed < 1024" | bc))); then
856			break
857		fi
858
859		speed=$(echo "scale=1; $speed / 1024" | bc)
860	done
861
862	echo "$speed${unit}"
863}
864
865rate()
866{
867	local t0=$1; shift
868	local t1=$1; shift
869	local interval=$1; shift
870
871	echo $((8 * (t1 - t0) / interval))
872}
873
874packets_rate()
875{
876	local t0=$1; shift
877	local t1=$1; shift
878	local interval=$1; shift
879
880	echo $(((t1 - t0) / interval))
881}
882
883mac_get()
884{
885	local if_name=$1
886
887	ip -j link show dev $if_name | jq -r '.[]["address"]'
888}
889
890ipv6_lladdr_get()
891{
892	local if_name=$1
893
894	ip -j addr show dev $if_name | \
895		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
896		head -1
897}
898
899bridge_ageing_time_get()
900{
901	local bridge=$1
902	local ageing_time
903
904	# Need to divide by 100 to convert to seconds.
905	ageing_time=$(ip -j -d link show dev $bridge \
906		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
907	echo $((ageing_time / 100))
908}
909
910declare -A SYSCTL_ORIG
911sysctl_set()
912{
913	local key=$1; shift
914	local value=$1; shift
915
916	SYSCTL_ORIG[$key]=$(sysctl -n $key)
917	sysctl -qw $key=$value
918}
919
920sysctl_restore()
921{
922	local key=$1; shift
923
924	sysctl -qw $key=${SYSCTL_ORIG["$key"]}
925}
926
927forwarding_enable()
928{
929	sysctl_set net.ipv4.conf.all.forwarding 1
930	sysctl_set net.ipv6.conf.all.forwarding 1
931}
932
933forwarding_restore()
934{
935	sysctl_restore net.ipv6.conf.all.forwarding
936	sysctl_restore net.ipv4.conf.all.forwarding
937}
938
939declare -A MTU_ORIG
940mtu_set()
941{
942	local dev=$1; shift
943	local mtu=$1; shift
944
945	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
946	ip link set dev $dev mtu $mtu
947}
948
949mtu_restore()
950{
951	local dev=$1; shift
952
953	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
954}
955
956tc_offload_check()
957{
958	local num_netifs=${1:-$NUM_NETIFS}
959
960	for ((i = 1; i <= num_netifs; ++i)); do
961		ethtool -k ${NETIFS[p$i]} \
962			| grep "hw-tc-offload: on" &> /dev/null
963		if [[ $? -ne 0 ]]; then
964			return 1
965		fi
966	done
967
968	return 0
969}
970
971trap_install()
972{
973	local dev=$1; shift
974	local direction=$1; shift
975
976	# Some devices may not support or need in-hardware trapping of traffic
977	# (e.g. the veth pairs that this library creates for non-existent
978	# loopbacks). Use continue instead, so that there is a filter in there
979	# (some tests check counters), and so that other filters are still
980	# processed.
981	tc filter add dev $dev $direction pref 1 \
982		flower skip_sw action trap 2>/dev/null \
983	    || tc filter add dev $dev $direction pref 1 \
984		       flower action continue
985}
986
987trap_uninstall()
988{
989	local dev=$1; shift
990	local direction=$1; shift
991
992	tc filter del dev $dev $direction pref 1 flower
993}
994
995slow_path_trap_install()
996{
997	# For slow-path testing, we need to install a trap to get to
998	# slow path the packets that would otherwise be switched in HW.
999	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
1000		trap_install "$@"
1001	fi
1002}
1003
1004slow_path_trap_uninstall()
1005{
1006	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
1007		trap_uninstall "$@"
1008	fi
1009}
1010
1011__icmp_capture_add_del()
1012{
1013	local add_del=$1; shift
1014	local pref=$1; shift
1015	local vsuf=$1; shift
1016	local tundev=$1; shift
1017	local filter=$1; shift
1018
1019	tc filter $add_del dev "$tundev" ingress \
1020	   proto ip$vsuf pref $pref \
1021	   flower ip_proto icmp$vsuf $filter \
1022	   action pass
1023}
1024
1025icmp_capture_install()
1026{
1027	__icmp_capture_add_del add 100 "" "$@"
1028}
1029
1030icmp_capture_uninstall()
1031{
1032	__icmp_capture_add_del del 100 "" "$@"
1033}
1034
1035icmp6_capture_install()
1036{
1037	__icmp_capture_add_del add 100 v6 "$@"
1038}
1039
1040icmp6_capture_uninstall()
1041{
1042	__icmp_capture_add_del del 100 v6 "$@"
1043}
1044
1045__vlan_capture_add_del()
1046{
1047	local add_del=$1; shift
1048	local pref=$1; shift
1049	local dev=$1; shift
1050	local filter=$1; shift
1051
1052	tc filter $add_del dev "$dev" ingress \
1053	   proto 802.1q pref $pref \
1054	   flower $filter \
1055	   action pass
1056}
1057
1058vlan_capture_install()
1059{
1060	__vlan_capture_add_del add 100 "$@"
1061}
1062
1063vlan_capture_uninstall()
1064{
1065	__vlan_capture_add_del del 100 "$@"
1066}
1067
1068__dscp_capture_add_del()
1069{
1070	local add_del=$1; shift
1071	local dev=$1; shift
1072	local base=$1; shift
1073	local dscp;
1074
1075	for prio in {0..7}; do
1076		dscp=$((base + prio))
1077		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
1078				       "skip_hw ip_tos $((dscp << 2))"
1079	done
1080}
1081
1082dscp_capture_install()
1083{
1084	local dev=$1; shift
1085	local base=$1; shift
1086
1087	__dscp_capture_add_del add $dev $base
1088}
1089
1090dscp_capture_uninstall()
1091{
1092	local dev=$1; shift
1093	local base=$1; shift
1094
1095	__dscp_capture_add_del del $dev $base
1096}
1097
1098dscp_fetch_stats()
1099{
1100	local dev=$1; shift
1101	local base=$1; shift
1102
1103	for prio in {0..7}; do
1104		local dscp=$((base + prio))
1105		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
1106		echo "[$dscp]=$t "
1107	done
1108}
1109
1110matchall_sink_create()
1111{
1112	local dev=$1; shift
1113
1114	tc qdisc add dev $dev clsact
1115	tc filter add dev $dev ingress \
1116	   pref 10000 \
1117	   matchall \
1118	   action drop
1119}
1120
1121tests_run()
1122{
1123	local current_test
1124
1125	for current_test in ${TESTS:-$ALL_TESTS}; do
1126		$current_test
1127	done
1128}
1129
1130multipath_eval()
1131{
1132	local desc="$1"
1133	local weight_rp12=$2
1134	local weight_rp13=$3
1135	local packets_rp12=$4
1136	local packets_rp13=$5
1137	local weights_ratio packets_ratio diff
1138
1139	RET=0
1140
1141	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1142		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
1143				| bc -l)
1144	else
1145		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
1146				| bc -l)
1147	fi
1148
1149	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
1150	       check_err 1 "Packet difference is 0"
1151	       log_test "Multipath"
1152	       log_info "Expected ratio $weights_ratio"
1153	       return
1154	fi
1155
1156	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1157		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
1158				| bc -l)
1159	else
1160		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
1161				| bc -l)
1162	fi
1163
1164	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
1165	diff=${diff#-}
1166
1167	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
1168	check_err $? "Too large discrepancy between expected and measured ratios"
1169	log_test "$desc"
1170	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
1171}
1172
1173in_ns()
1174{
1175	local name=$1; shift
1176
1177	ip netns exec $name bash <<-EOF
1178		NUM_NETIFS=0
1179		source lib.sh
1180		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
1181	EOF
1182}
1183
1184##############################################################################
1185# Tests
1186
1187ping_do()
1188{
1189	local if_name=$1
1190	local dip=$2
1191	local args=$3
1192	local vrf_name
1193
1194	vrf_name=$(master_name_get $if_name)
1195	ip vrf exec $vrf_name \
1196		$PING $args $dip -c $PING_COUNT -i 0.1 \
1197		-w $PING_TIMEOUT &> /dev/null
1198}
1199
1200ping_test()
1201{
1202	RET=0
1203
1204	ping_do $1 $2
1205	check_err $?
1206	log_test "ping$3"
1207}
1208
1209ping6_do()
1210{
1211	local if_name=$1
1212	local dip=$2
1213	local args=$3
1214	local vrf_name
1215
1216	vrf_name=$(master_name_get $if_name)
1217	ip vrf exec $vrf_name \
1218		$PING6 $args $dip -c $PING_COUNT -i 0.1 \
1219		-w $PING_TIMEOUT &> /dev/null
1220}
1221
1222ping6_test()
1223{
1224	RET=0
1225
1226	ping6_do $1 $2
1227	check_err $?
1228	log_test "ping6$3"
1229}
1230
1231learning_test()
1232{
1233	local bridge=$1
1234	local br_port1=$2	# Connected to `host1_if`.
1235	local host1_if=$3
1236	local host2_if=$4
1237	local mac=de:ad:be:ef:13:37
1238	local ageing_time
1239
1240	RET=0
1241
1242	bridge -j fdb show br $bridge brport $br_port1 \
1243		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1244	check_fail $? "Found FDB record when should not"
1245
1246	# Disable unknown unicast flooding on `br_port1` to make sure
1247	# packets are only forwarded through the port after a matching
1248	# FDB entry was installed.
1249	bridge link set dev $br_port1 flood off
1250
1251	ip link set $host1_if promisc on
1252	tc qdisc add dev $host1_if ingress
1253	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
1254		flower dst_mac $mac action drop
1255
1256	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1257	sleep 1
1258
1259	tc -j -s filter show dev $host1_if ingress \
1260		| jq -e ".[] | select(.options.handle == 101) \
1261		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1262	check_fail $? "Packet reached first host when should not"
1263
1264	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1265	sleep 1
1266
1267	bridge -j fdb show br $bridge brport $br_port1 \
1268		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1269	check_err $? "Did not find FDB record when should"
1270
1271	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1272	sleep 1
1273
1274	tc -j -s filter show dev $host1_if ingress \
1275		| jq -e ".[] | select(.options.handle == 101) \
1276		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1277	check_err $? "Packet did not reach second host when should"
1278
1279	# Wait for 10 seconds after the ageing time to make sure FDB
1280	# record was aged-out.
1281	ageing_time=$(bridge_ageing_time_get $bridge)
1282	sleep $((ageing_time + 10))
1283
1284	bridge -j fdb show br $bridge brport $br_port1 \
1285		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1286	check_fail $? "Found FDB record when should not"
1287
1288	bridge link set dev $br_port1 learning off
1289
1290	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1291	sleep 1
1292
1293	bridge -j fdb show br $bridge brport $br_port1 \
1294		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1295	check_fail $? "Found FDB record when should not"
1296
1297	bridge link set dev $br_port1 learning on
1298
1299	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
1300	tc qdisc del dev $host1_if ingress
1301	ip link set $host1_if promisc off
1302
1303	bridge link set dev $br_port1 flood on
1304
1305	log_test "FDB learning"
1306}
1307
1308flood_test_do()
1309{
1310	local should_flood=$1
1311	local mac=$2
1312	local ip=$3
1313	local host1_if=$4
1314	local host2_if=$5
1315	local err=0
1316
1317	# Add an ACL on `host2_if` which will tell us whether the packet
1318	# was flooded to it or not.
1319	ip link set $host2_if promisc on
1320	tc qdisc add dev $host2_if ingress
1321	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
1322		flower dst_mac $mac action drop
1323
1324	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
1325	sleep 1
1326
1327	tc -j -s filter show dev $host2_if ingress \
1328		| jq -e ".[] | select(.options.handle == 101) \
1329		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1330	if [[ $? -ne 0 && $should_flood == "true" || \
1331	      $? -eq 0 && $should_flood == "false" ]]; then
1332		err=1
1333	fi
1334
1335	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
1336	tc qdisc del dev $host2_if ingress
1337	ip link set $host2_if promisc off
1338
1339	return $err
1340}
1341
1342flood_unicast_test()
1343{
1344	local br_port=$1
1345	local host1_if=$2
1346	local host2_if=$3
1347	local mac=de:ad:be:ef:13:37
1348	local ip=192.0.2.100
1349
1350	RET=0
1351
1352	bridge link set dev $br_port flood off
1353
1354	flood_test_do false $mac $ip $host1_if $host2_if
1355	check_err $? "Packet flooded when should not"
1356
1357	bridge link set dev $br_port flood on
1358
1359	flood_test_do true $mac $ip $host1_if $host2_if
1360	check_err $? "Packet was not flooded when should"
1361
1362	log_test "Unknown unicast flood"
1363}
1364
1365flood_multicast_test()
1366{
1367	local br_port=$1
1368	local host1_if=$2
1369	local host2_if=$3
1370	local mac=01:00:5e:00:00:01
1371	local ip=239.0.0.1
1372
1373	RET=0
1374
1375	bridge link set dev $br_port mcast_flood off
1376
1377	flood_test_do false $mac $ip $host1_if $host2_if
1378	check_err $? "Packet flooded when should not"
1379
1380	bridge link set dev $br_port mcast_flood on
1381
1382	flood_test_do true $mac $ip $host1_if $host2_if
1383	check_err $? "Packet was not flooded when should"
1384
1385	log_test "Unregistered multicast flood"
1386}
1387
1388flood_test()
1389{
1390	# `br_port` is connected to `host2_if`
1391	local br_port=$1
1392	local host1_if=$2
1393	local host2_if=$3
1394
1395	flood_unicast_test $br_port $host1_if $host2_if
1396	flood_multicast_test $br_port $host1_if $host2_if
1397}
1398
1399__start_traffic()
1400{
1401	local pktsize=$1; shift
1402	local proto=$1; shift
1403	local h_in=$1; shift    # Where the traffic egresses the host
1404	local sip=$1; shift
1405	local dip=$1; shift
1406	local dmac=$1; shift
1407
1408	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
1409		-a own -b $dmac -t "$proto" -q "$@" &
1410	sleep 1
1411}
1412
1413start_traffic_pktsize()
1414{
1415	local pktsize=$1; shift
1416
1417	__start_traffic $pktsize udp "$@"
1418}
1419
1420start_tcp_traffic_pktsize()
1421{
1422	local pktsize=$1; shift
1423
1424	__start_traffic $pktsize tcp "$@"
1425}
1426
1427start_traffic()
1428{
1429	start_traffic_pktsize 8000 "$@"
1430}
1431
1432start_tcp_traffic()
1433{
1434	start_tcp_traffic_pktsize 8000 "$@"
1435}
1436
1437stop_traffic()
1438{
1439	# Suppress noise from killing mausezahn.
1440	{ kill %% && wait %%; } 2>/dev/null
1441}
1442
1443declare -A cappid
1444declare -A capfile
1445declare -A capout
1446
1447tcpdump_start()
1448{
1449	local if_name=$1; shift
1450	local ns=$1; shift
1451
1452	capfile[$if_name]=$(mktemp)
1453	capout[$if_name]=$(mktemp)
1454
1455	if [ -z $ns ]; then
1456		ns_cmd=""
1457	else
1458		ns_cmd="ip netns exec ${ns}"
1459	fi
1460
1461	if [ -z $SUDO_USER ] ; then
1462		capuser=""
1463	else
1464		capuser="-Z $SUDO_USER"
1465	fi
1466
1467	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
1468		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
1469		> "${capout[$if_name]}" 2>&1 &
1470	cappid[$if_name]=$!
1471
1472	sleep 1
1473}
1474
1475tcpdump_stop()
1476{
1477	local if_name=$1
1478	local pid=${cappid[$if_name]}
1479
1480	$ns_cmd kill "$pid" && wait "$pid"
1481	sleep 1
1482}
1483
1484tcpdump_cleanup()
1485{
1486	local if_name=$1
1487
1488	rm ${capfile[$if_name]} ${capout[$if_name]}
1489}
1490
1491tcpdump_show()
1492{
1493	local if_name=$1
1494
1495	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
1496}
1497
1498# return 0 if the packet wasn't seen on host2_if or 1 if it was
1499mcast_packet_test()
1500{
1501	local mac=$1
1502	local src_ip=$2
1503	local ip=$3
1504	local host1_if=$4
1505	local host2_if=$5
1506	local seen=0
1507	local tc_proto="ip"
1508	local mz_v6arg=""
1509
1510	# basic check to see if we were passed an IPv4 address, if not assume IPv6
1511	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
1512		tc_proto="ipv6"
1513		mz_v6arg="-6"
1514	fi
1515
1516	# Add an ACL on `host2_if` which will tell us whether the packet
1517	# was received by it or not.
1518	tc qdisc add dev $host2_if ingress
1519	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
1520		flower ip_proto udp dst_mac $mac action drop
1521
1522	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
1523	sleep 1
1524
1525	tc -j -s filter show dev $host2_if ingress \
1526		| jq -e ".[] | select(.options.handle == 101) \
1527		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1528	if [[ $? -eq 0 ]]; then
1529		seen=1
1530	fi
1531
1532	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
1533	tc qdisc del dev $host2_if ingress
1534
1535	return $seen
1536}
1537
1538brmcast_check_sg_entries()
1539{
1540	local report=$1; shift
1541	local slist=("$@")
1542	local sarg=""
1543
1544	for src in "${slist[@]}"; do
1545		sarg="${sarg} and .source_list[].address == \"$src\""
1546	done
1547	bridge -j -d -s mdb show dev br0 \
1548		| jq -e ".[].mdb[] | \
1549			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
1550	check_err $? "Wrong *,G entry source list after $report report"
1551
1552	for sgent in "${slist[@]}"; do
1553		bridge -j -d -s mdb show dev br0 \
1554			| jq -e ".[].mdb[] | \
1555				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
1556		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
1557	done
1558}
1559
1560brmcast_check_sg_fwding()
1561{
1562	local should_fwd=$1; shift
1563	local sources=("$@")
1564
1565	for src in "${sources[@]}"; do
1566		local retval=0
1567
1568		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
1569		retval=$?
1570		if [ $should_fwd -eq 1 ]; then
1571			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
1572		else
1573			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
1574		fi
1575	done
1576}
1577
1578brmcast_check_sg_state()
1579{
1580	local is_blocked=$1; shift
1581	local sources=("$@")
1582	local should_fail=1
1583
1584	if [ $is_blocked -eq 1 ]; then
1585		should_fail=0
1586	fi
1587
1588	for src in "${sources[@]}"; do
1589		bridge -j -d -s mdb show dev br0 \
1590			| jq -e ".[].mdb[] | \
1591				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
1592				 .source_list[] |
1593				 select(.address == \"$src\") |
1594				 select(.timer == \"0.00\")" &>/dev/null
1595		check_err_fail $should_fail $? "Entry $src has zero timer"
1596
1597		bridge -j -d -s mdb show dev br0 \
1598			| jq -e ".[].mdb[] | \
1599				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
1600				 .flags[] == \"blocked\")" &>/dev/null
1601		check_err_fail $should_fail $? "Entry $src has blocked flag"
1602	done
1603}
1604
1605mc_join()
1606{
1607	local if_name=$1
1608	local group=$2
1609	local vrf_name=$(master_name_get $if_name)
1610
1611	# We don't care about actual reception, just about joining the
1612	# IP multicast group and adding the L2 address to the device's
1613	# MAC filtering table
1614	ip vrf exec $vrf_name \
1615		mreceive -g $group -I $if_name > /dev/null 2>&1 &
1616	mreceive_pid=$!
1617
1618	sleep 1
1619}
1620
1621mc_leave()
1622{
1623	kill "$mreceive_pid" && wait "$mreceive_pid"
1624}
1625
1626mc_send()
1627{
1628	local if_name=$1
1629	local groups=$2
1630	local vrf_name=$(master_name_get $if_name)
1631
1632	ip vrf exec $vrf_name \
1633		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
1634}
1635
1636start_ip_monitor()
1637{
1638	local mtype=$1; shift
1639	local ip=${1-ip}; shift
1640
1641	# start the monitor in the background
1642	tmpfile=`mktemp /var/run/nexthoptestXXX`
1643	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
1644	sleep 0.2
1645	echo "$mpid $tmpfile"
1646}
1647
1648stop_ip_monitor()
1649{
1650	local mpid=$1; shift
1651	local tmpfile=$1; shift
1652	local el=$1; shift
1653	local what=$1; shift
1654
1655	sleep 0.2
1656	kill $mpid
1657	local lines=`grep '^\w' $tmpfile | wc -l`
1658	test $lines -eq $el
1659	check_err $? "$what: $lines lines of events, expected $el"
1660	rm -rf $tmpfile
1661}
1662
1663hw_stats_monitor_test()
1664{
1665	local dev=$1; shift
1666	local type=$1; shift
1667	local make_suitable=$1; shift
1668	local make_unsuitable=$1; shift
1669	local ip=${1-ip}; shift
1670
1671	RET=0
1672
1673	# Expect a notification about enablement.
1674	local ipmout=$(start_ip_monitor stats "$ip")
1675	$ip stats set dev $dev ${type}_stats on
1676	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
1677
1678	# Expect a notification about offload.
1679	local ipmout=$(start_ip_monitor stats "$ip")
1680	$make_suitable
1681	stop_ip_monitor $ipmout 1 "${type}_stats installation"
1682
1683	# Expect a notification about loss of offload.
1684	local ipmout=$(start_ip_monitor stats "$ip")
1685	$make_unsuitable
1686	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
1687
1688	# Expect a notification about disablement
1689	local ipmout=$(start_ip_monitor stats "$ip")
1690	$ip stats set dev $dev ${type}_stats off
1691	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
1692
1693	log_test "${type}_stats notifications"
1694}
1695