1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4. "$(dirname "${0}")/mptcp_lib.sh"
5
6time_start=$(date +%s)
7
8optstring="S:R:d:e:l:r:h4cm:f:tC"
9ret=0
10sin=""
11sout=""
12cin_disconnect=""
13cin=""
14cout=""
15ksft_skip=4
16capture=false
17timeout_poll=30
18timeout_test=$((timeout_poll * 2 + 1))
19ipv6=true
20ethtool_random_on=true
21tc_delay="$((RANDOM%50))"
22tc_loss=$((RANDOM%101))
23testmode=""
24sndbuf=0
25rcvbuf=0
26options_log=true
27do_tcp=0
28checksum=false
29filesize=0
30connect_per_transfer=1
31
32if [ $tc_loss -eq 100 ];then
33	tc_loss=1%
34elif [ $tc_loss -ge 10 ]; then
35	tc_loss=0.$tc_loss%
36elif [ $tc_loss -ge 1 ]; then
37	tc_loss=0.0$tc_loss%
38else
39	tc_loss=""
40fi
41
42usage() {
43	echo "Usage: $0 [ -a ]"
44	echo -e "\t-d: tc/netem delay in milliseconds, e.g. \"-d 10\" (default random)"
45	echo -e "\t-l: tc/netem loss percentage, e.g. \"-l 0.02\" (default random)"
46	echo -e "\t-r: tc/netem reorder mode, e.g. \"-r 25% 50% gap 5\", use "-r 0" to disable reordering (default random)"
47	echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
48	echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
49	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
50	echo -e "\t-f: size of file to transfer in bytes (default random)"
51	echo -e "\t-S: set sndbuf value (default: use kernel default)"
52	echo -e "\t-R: set rcvbuf value (default: use kernel default)"
53	echo -e "\t-m: test mode (poll, sendfile; default: poll)"
54	echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
55	echo -e "\t-C: enable the MPTCP data checksum"
56}
57
58while getopts "$optstring" option;do
59	case "$option" in
60	"h")
61		usage $0
62		exit 0
63		;;
64	"d")
65		if [ $OPTARG -ge 0 ];then
66			tc_delay="$OPTARG"
67		else
68			echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2
69			exit 1
70		fi
71		;;
72	"e")
73		ethtool_args="$ethtool_args $OPTARG off"
74		ethtool_random_on=false
75		;;
76	"l")
77		tc_loss="$OPTARG"
78		;;
79	"r")
80		tc_reorder="$OPTARG"
81		;;
82	"4")
83		ipv6=false
84		;;
85	"c")
86		capture=true
87		;;
88	"S")
89		if [ $OPTARG -ge 0 ];then
90			sndbuf="$OPTARG"
91		else
92			echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
93			exit 1
94		fi
95		;;
96	"R")
97		if [ $OPTARG -ge 0 ];then
98			rcvbuf="$OPTARG"
99		else
100			echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
101			exit 1
102		fi
103		;;
104	"m")
105		testmode="$OPTARG"
106		;;
107	"f")
108		filesize="$OPTARG"
109		;;
110	"t")
111		do_tcp=$((do_tcp+1))
112		;;
113	"C")
114		checksum=true
115		;;
116	"?")
117		usage $0
118		exit 1
119		;;
120	esac
121done
122
123sec=$(date +%s)
124rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
125ns1="ns1-$rndh"
126ns2="ns2-$rndh"
127ns3="ns3-$rndh"
128ns4="ns4-$rndh"
129
130TEST_COUNT=0
131
132cleanup()
133{
134	rm -f "$cin_disconnect" "$cout_disconnect"
135	rm -f "$cin" "$cout"
136	rm -f "$sin" "$sout"
137	rm -f "$capout"
138
139	local netns
140	for netns in "$ns1" "$ns2" "$ns3" "$ns4";do
141		ip netns del $netns
142		rm -f /tmp/$netns.{nstat,out}
143	done
144}
145
146mptcp_lib_check_mptcp
147mptcp_lib_check_kallsyms
148
149ip -Version > /dev/null 2>&1
150if [ $? -ne 0 ];then
151	echo "SKIP: Could not run test without ip tool"
152	exit $ksft_skip
153fi
154
155sin=$(mktemp)
156sout=$(mktemp)
157cin=$(mktemp)
158cout=$(mktemp)
159capout=$(mktemp)
160cin_disconnect="$cin".disconnect
161cout_disconnect="$cout".disconnect
162trap cleanup EXIT
163
164for i in "$ns1" "$ns2" "$ns3" "$ns4";do
165	ip netns add $i || exit $ksft_skip
166	ip -net $i link set lo up
167done
168
169#  "$ns1"              ns2                    ns3                     ns4
170# ns1eth2    ns2eth1   ns2eth3      ns3eth2   ns3eth4       ns4eth3
171#                           - drop 1% ->            reorder 25%
172#                           <- TSO off -
173
174ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
175ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth2 netns "$ns3"
176ip link add ns3eth4 netns "$ns3" type veth peer name ns4eth3 netns "$ns4"
177
178ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
179ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
180
181ip -net "$ns1" link set ns1eth2 up
182ip -net "$ns1" route add default via 10.0.1.2
183ip -net "$ns1" route add default via dead:beef:1::2
184
185ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
186ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
187ip -net "$ns2" link set ns2eth1 up
188
189ip -net "$ns2" addr add 10.0.2.1/24 dev ns2eth3
190ip -net "$ns2" addr add dead:beef:2::1/64 dev ns2eth3 nodad
191ip -net "$ns2" link set ns2eth3 up
192ip -net "$ns2" route add default via 10.0.2.2
193ip -net "$ns2" route add default via dead:beef:2::2
194ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
195ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
196
197ip -net "$ns3" addr add 10.0.2.2/24 dev ns3eth2
198ip -net "$ns3" addr add dead:beef:2::2/64 dev ns3eth2 nodad
199ip -net "$ns3" link set ns3eth2 up
200
201ip -net "$ns3" addr add 10.0.3.2/24 dev ns3eth4
202ip -net "$ns3" addr add dead:beef:3::2/64 dev ns3eth4 nodad
203ip -net "$ns3" link set ns3eth4 up
204ip -net "$ns3" route add default via 10.0.2.1
205ip -net "$ns3" route add default via dead:beef:2::1
206ip netns exec "$ns3" sysctl -q net.ipv4.ip_forward=1
207ip netns exec "$ns3" sysctl -q net.ipv6.conf.all.forwarding=1
208
209ip -net "$ns4" addr add 10.0.3.1/24 dev ns4eth3
210ip -net "$ns4" addr add dead:beef:3::1/64 dev ns4eth3 nodad
211ip -net "$ns4" link set ns4eth3 up
212ip -net "$ns4" route add default via 10.0.3.2
213ip -net "$ns4" route add default via dead:beef:3::2
214
215if $checksum; then
216	for i in "$ns1" "$ns2" "$ns3" "$ns4";do
217		ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
218	done
219fi
220
221set_ethtool_flags() {
222	local ns="$1"
223	local dev="$2"
224	local flags="$3"
225
226	ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
227	[ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
228}
229
230set_random_ethtool_flags() {
231	local flags=""
232	local r=$RANDOM
233
234	local pick1=$((r & 1))
235	local pick2=$((r & 2))
236	local pick3=$((r & 4))
237
238	[ $pick1 -ne 0 ] && flags="tso off"
239	[ $pick2 -ne 0 ] && flags="$flags gso off"
240	[ $pick3 -ne 0 ] && flags="$flags gro off"
241
242	[ -z "$flags" ] && return
243
244	set_ethtool_flags "$1" "$2" "$flags"
245}
246
247if $ethtool_random_on;then
248	set_random_ethtool_flags "$ns3" ns3eth2
249	set_random_ethtool_flags "$ns4" ns4eth3
250else
251	set_ethtool_flags "$ns3" ns3eth2 "$ethtool_args"
252	set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
253fi
254
255print_file_err()
256{
257	ls -l "$1" 1>&2
258	echo "Trailing bytes are: "
259	tail -c 27 "$1"
260}
261
262check_transfer()
263{
264	local in=$1
265	local out=$2
266	local what=$3
267
268	cmp "$in" "$out" > /dev/null 2>&1
269	if [ $? -ne 0 ] ;then
270		echo "[ FAIL ] $what does not match (in, out):"
271		print_file_err "$in"
272		print_file_err "$out"
273
274		return 1
275	fi
276
277	return 0
278}
279
280check_mptcp_disabled()
281{
282	local disabled_ns="ns_disabled-$rndh"
283	ip netns add ${disabled_ns} || exit $ksft_skip
284
285	# net.mptcp.enabled should be enabled by default
286	if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
287		echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
288		ret=1
289		return 1
290	fi
291	ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
292
293	local err=0
294	LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
295		grep -q "^socket: Protocol not available$" && err=1
296	ip netns delete ${disabled_ns}
297
298	if [ ${err} -eq 0 ]; then
299		echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
300		ret=1
301		return 1
302	fi
303
304	echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
305	return 0
306}
307
308# $1: IP address
309is_v6()
310{
311	[ -z "${1##*:*}" ]
312}
313
314do_ping()
315{
316	local listener_ns="$1"
317	local connector_ns="$2"
318	local connect_addr="$3"
319	local ping_args="-q -c 1"
320
321	if is_v6 "${connect_addr}"; then
322		$ipv6 || return 0
323		ping_args="${ping_args} -6"
324	fi
325
326	ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null
327	if [ $? -ne 0 ] ; then
328		echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
329		ret=1
330
331		return 1
332	fi
333
334	return 0
335}
336
337# $1: ns, $2: MIB counter
338get_mib_counter()
339{
340	local listener_ns="${1}"
341	local mib="${2}"
342
343	# strip the header
344	ip netns exec "${listener_ns}" \
345		nstat -z -a "${mib}" | \
346			tail -n+2 | \
347			while read a count c rest; do
348				echo $count
349			done
350}
351
352# $1: ns, $2: port
353wait_local_port_listen()
354{
355	local listener_ns="${1}"
356	local port="${2}"
357
358	local port_hex i
359
360	port_hex="$(printf "%04X" "${port}")"
361	for i in $(seq 10); do
362		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
363			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
364			break
365		sleep 0.1
366	done
367}
368
369do_transfer()
370{
371	local listener_ns="$1"
372	local connector_ns="$2"
373	local cl_proto="$3"
374	local srv_proto="$4"
375	local connect_addr="$5"
376	local local_addr="$6"
377	local extra_args="$7"
378
379	local port
380	port=$((10000+$TEST_COUNT))
381	TEST_COUNT=$((TEST_COUNT+1))
382
383	if [ "$rcvbuf" -gt 0 ]; then
384		extra_args="$extra_args -R $rcvbuf"
385	fi
386
387	if [ "$sndbuf" -gt 0 ]; then
388		extra_args="$extra_args -S $sndbuf"
389	fi
390
391	if [ -n "$testmode" ]; then
392		extra_args="$extra_args -m $testmode"
393	fi
394
395	if [ -n "$extra_args" ] && $options_log; then
396		echo "INFO: extra options: $extra_args"
397	fi
398	options_log=false
399
400	:> "$cout"
401	:> "$sout"
402	:> "$capout"
403
404	local addr_port
405	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
406	printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto}
407
408	if $capture; then
409		local capuser
410		if [ -z $SUDO_USER ] ; then
411			capuser=""
412		else
413			capuser="-Z $SUDO_USER"
414		fi
415
416		local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
417		local capopt="-i any -s 65535 -B 32768 ${capuser}"
418
419		ip netns exec ${listener_ns}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
420		local cappid_listener=$!
421
422		ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
423		local cappid_connector=$!
424
425		sleep 1
426	fi
427
428	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
429		nstat -n
430	if [ ${listener_ns} != ${connector_ns} ]; then
431		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
432			nstat -n
433	fi
434
435	local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
436	local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
437	local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
438	local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
439	local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
440	local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
441
442	timeout ${timeout_test} \
443		ip netns exec ${listener_ns} \
444			./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
445				$extra_args $local_addr < "$sin" > "$sout" &
446	local spid=$!
447
448	wait_local_port_listen "${listener_ns}" "${port}"
449
450	local start
451	start=$(date +%s%3N)
452	timeout ${timeout_test} \
453		ip netns exec ${connector_ns} \
454			./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
455				$extra_args $connect_addr < "$cin" > "$cout" &
456	local cpid=$!
457
458	wait $cpid
459	local retc=$?
460	wait $spid
461	local rets=$?
462
463	local stop
464	stop=$(date +%s%3N)
465
466	if $capture; then
467		sleep 1
468		kill ${cappid_listener}
469		kill ${cappid_connector}
470	fi
471
472	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
473		nstat | grep Tcp > /tmp/${listener_ns}.out
474	if [ ${listener_ns} != ${connector_ns} ]; then
475		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
476			nstat | grep Tcp > /tmp/${connector_ns}.out
477	fi
478
479	local duration
480	duration=$((stop-start))
481	printf "(duration %05sms) " "${duration}"
482	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
483		echo "[ FAIL ] client exit code $retc, server $rets" 1>&2
484		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
485		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
486		cat /tmp/${listener_ns}.out
487		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
488		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
489		[ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out
490
491		echo
492		cat "$capout"
493		return 1
494	fi
495
496	check_transfer $sin $cout "file received by client"
497	retc=$?
498	check_transfer $cin $sout "file received by server"
499	rets=$?
500
501	local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
502	local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
503	local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
504	local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
505	local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
506
507	expect_synrx=$((stat_synrx_last_l))
508	expect_ackrx=$((stat_ackrx_last_l))
509
510	cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
511	cookies=${cookies##*=}
512
513	if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
514		expect_synrx=$((stat_synrx_last_l+$connect_per_transfer))
515		expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer))
516	fi
517
518	if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
519		printf "[ FAIL ] lower MPC SYN rx (%d) than expected (%d)\n" \
520			"${stat_synrx_now_l}" "${expect_synrx}" 1>&2
521		retc=1
522	fi
523	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
524		if [ ${stat_ooo_now} -eq 0 ]; then
525			printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
526				"${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
527			rets=1
528		else
529			printf "[ Note ] fallback due to TCP OoO"
530		fi
531	fi
532
533	if $checksum; then
534		local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
535		local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
536
537		local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
538		if [ $csum_err_s_nr -gt 0 ]; then
539			printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
540			rets=1
541		fi
542
543		local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
544		if [ $csum_err_c_nr -gt 0 ]; then
545			printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
546			retc=1
547		fi
548	fi
549
550	if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
551		printf "[ OK ]"
552	fi
553
554	if [ $cookies -eq 2 ];then
555		if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
556			printf " WARN: CookieSent: did not advance"
557		fi
558		if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
559			printf " WARN: CookieRecv: did not advance"
560		fi
561	else
562		if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
563			printf " WARN: CookieSent: changed"
564		fi
565		if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
566			printf " WARN: CookieRecv: changed"
567		fi
568	fi
569
570	if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
571		printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \
572			"${expect_synrx}" "${stat_synrx_now_l}"
573	fi
574	if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
575		printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \
576			"${expect_ackrx}" "${stat_ackrx_now_l}"
577	fi
578
579	echo
580	cat "$capout"
581	[ $retc -eq 0 ] && [ $rets -eq 0 ]
582}
583
584make_file()
585{
586	local name=$1
587	local who=$2
588	local SIZE=$filesize
589	local ksize
590	local rem
591
592	if [ $SIZE -eq 0 ]; then
593		local MAXSIZE=$((1024 * 1024 * 8))
594		local MINSIZE=$((1024 * 256))
595
596		SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE))
597	fi
598
599	ksize=$((SIZE / 1024))
600	rem=$((SIZE - (ksize * 1024)))
601
602	dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
603	dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
604	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
605
606	echo "Created $name (size $(du -b "$name")) containing data sent by $who"
607}
608
609run_tests_lo()
610{
611	local listener_ns="$1"
612	local connector_ns="$2"
613	local connect_addr="$3"
614	local loopback="$4"
615	local extra_args="$5"
616	local lret=0
617
618	# skip if test programs are running inside same netns for subsequent runs.
619	if [ $loopback -eq 0 ] && [ ${listener_ns} = ${connector_ns} ]; then
620		return 0
621	fi
622
623	# skip if we don't want v6
624	if ! $ipv6 && is_v6 "${connect_addr}"; then
625		return 0
626	fi
627
628	local local_addr
629	if is_v6 "${connect_addr}"; then
630		local_addr="::"
631	else
632		local_addr="0.0.0.0"
633	fi
634
635	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
636		    ${connect_addr} ${local_addr} "${extra_args}"
637	lret=$?
638	if [ $lret -ne 0 ]; then
639		ret=$lret
640		return 1
641	fi
642
643	if [ $do_tcp -eq 0 ]; then
644		# don't bother testing fallback tcp except for loopback case.
645		if [ ${listener_ns} != ${connector_ns} ]; then
646			return 0
647		fi
648	fi
649
650	do_transfer ${listener_ns} ${connector_ns} MPTCP TCP \
651		    ${connect_addr} ${local_addr} "${extra_args}"
652	lret=$?
653	if [ $lret -ne 0 ]; then
654		ret=$lret
655		return 1
656	fi
657
658	do_transfer ${listener_ns} ${connector_ns} TCP MPTCP \
659		    ${connect_addr} ${local_addr} "${extra_args}"
660	lret=$?
661	if [ $lret -ne 0 ]; then
662		ret=$lret
663		return 1
664	fi
665
666	if [ $do_tcp -gt 1 ] ;then
667		do_transfer ${listener_ns} ${connector_ns} TCP TCP \
668			    ${connect_addr} ${local_addr} "${extra_args}"
669		lret=$?
670		if [ $lret -ne 0 ]; then
671			ret=$lret
672			return 1
673		fi
674	fi
675
676	return 0
677}
678
679run_tests()
680{
681	run_tests_lo $1 $2 $3 0
682}
683
684run_test_transparent()
685{
686	local connect_addr="$1"
687	local msg="$2"
688
689	local connector_ns="$ns1"
690	local listener_ns="$ns2"
691	local lret=0
692	local r6flag=""
693
694	# skip if we don't want v6
695	if ! $ipv6 && is_v6 "${connect_addr}"; then
696		return 0
697	fi
698
699	# IP(V6)_TRANSPARENT has been added after TOS support which came with
700	# the required infrastructure in MPTCP sockopt code. To support TOS, the
701	# following function has been exported (T). Not great but better than
702	# checking for a specific kernel version.
703	if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
704		echo "INFO: ${msg} not supported by the kernel: SKIP"
705		return
706	fi
707
708ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
709flush ruleset
710table inet mangle {
711	chain divert {
712		type filter hook prerouting priority -150;
713
714		meta l4proto tcp socket transparent 1 meta mark set 1 accept
715		tcp dport 20000 tproxy to :20000 meta mark set 1 accept
716	}
717}
718EOF
719	if [ $? -ne 0 ]; then
720		echo "SKIP: $msg, could not load nft ruleset"
721		mptcp_lib_fail_if_expected_feature "nft rules"
722		return
723	fi
724
725	local local_addr
726	if is_v6 "${connect_addr}"; then
727		local_addr="::"
728		r6flag="-6"
729	else
730		local_addr="0.0.0.0"
731	fi
732
733	ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100
734	if [ $? -ne 0 ]; then
735		ip netns exec "$listener_ns" nft flush ruleset
736		echo "SKIP: $msg, ip $r6flag rule failed"
737		mptcp_lib_fail_if_expected_feature "ip rule"
738		return
739	fi
740
741	ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100
742	if [ $? -ne 0 ]; then
743		ip netns exec "$listener_ns" nft flush ruleset
744		ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
745		echo "SKIP: $msg, ip route add local $local_addr failed"
746		mptcp_lib_fail_if_expected_feature "ip route"
747		return
748	fi
749
750	echo "INFO: test $msg"
751
752	TEST_COUNT=10000
753	local extra_args="-o TRANSPARENT"
754	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
755		    ${connect_addr} ${local_addr} "${extra_args}"
756	lret=$?
757
758	ip netns exec "$listener_ns" nft flush ruleset
759	ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
760	ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
761
762	if [ $lret -ne 0 ]; then
763		echo "FAIL: $msg, mptcp connection error" 1>&2
764		ret=$lret
765		return 1
766	fi
767
768	echo "PASS: $msg"
769	return 0
770}
771
772run_tests_peekmode()
773{
774	local peekmode="$1"
775
776	echo "INFO: with peek mode: ${peekmode}"
777	run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}"
778	run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
779}
780
781run_tests_mptfo()
782{
783	if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then
784		echo "INFO: TFO not supported by the kernel: SKIP"
785		return
786	fi
787
788	echo "INFO: with MPTFO start"
789	ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
790	ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
791
792	run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
793	run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
794
795	run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
796	run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
797
798	ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
799	ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
800	echo "INFO: with MPTFO end"
801}
802
803run_tests_disconnect()
804{
805	local old_cin=$cin
806	local old_sin=$sin
807
808	if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then
809		echo "INFO: Full disconnect not supported: SKIP"
810		return
811	fi
812
813	cat $cin $cin $cin > "$cin".disconnect
814
815	# force do_transfer to cope with the multiple transmissions
816	sin="$cin.disconnect"
817	cin="$cin.disconnect"
818	cin_disconnect="$old_cin"
819	connect_per_transfer=3
820
821	echo "INFO: disconnect"
822	run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin"
823	run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
824
825	# restore previous status
826	sin=$old_sin
827	cin=$old_cin
828	cin_disconnect="$cin".disconnect
829	connect_per_transfer=1
830}
831
832display_time()
833{
834	time_end=$(date +%s)
835	time_run=$((time_end-time_start))
836
837	echo "Time: ${time_run} seconds"
838}
839
840stop_if_error()
841{
842	local msg="$1"
843
844	if [ ${ret} -ne 0 ]; then
845		echo "FAIL: ${msg}" 1>&2
846		display_time
847		exit ${ret}
848	fi
849}
850
851make_file "$cin" "client"
852make_file "$sin" "server"
853
854check_mptcp_disabled
855
856stop_if_error "The kernel configuration is not valid for MPTCP"
857
858echo "INFO: validating network environment with pings"
859for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
860	do_ping "$ns1" $sender 10.0.1.1
861	do_ping "$ns1" $sender dead:beef:1::1
862
863	do_ping "$ns2" $sender 10.0.1.2
864	do_ping "$ns2" $sender dead:beef:1::2
865	do_ping "$ns2" $sender 10.0.2.1
866	do_ping "$ns2" $sender dead:beef:2::1
867
868	do_ping "$ns3" $sender 10.0.2.2
869	do_ping "$ns3" $sender dead:beef:2::2
870	do_ping "$ns3" $sender 10.0.3.2
871	do_ping "$ns3" $sender dead:beef:3::2
872
873	do_ping "$ns4" $sender 10.0.3.1
874	do_ping "$ns4" $sender dead:beef:3::1
875done
876
877stop_if_error "Could not even run ping tests"
878
879[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
880echo -n "INFO: Using loss of $tc_loss "
881test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
882
883reorder_delay=$(($tc_delay / 4))
884
885if [ -z "${tc_reorder}" ]; then
886	reorder1=$((RANDOM%10))
887	reorder1=$((100 - reorder1))
888	reorder2=$((RANDOM%100))
889
890	if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
891		tc_reorder="reorder ${reorder1}% ${reorder2}%"
892		echo -n "$tc_reorder with delay ${reorder_delay}ms "
893	fi
894elif [ "$tc_reorder" = "0" ];then
895	tc_reorder=""
896elif [ "$reorder_delay" -gt 0 ];then
897	# reordering requires some delay
898	tc_reorder="reorder $tc_reorder"
899	echo -n "$tc_reorder with delay ${reorder_delay}ms "
900fi
901
902echo "on ns3eth4"
903
904tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
905
906run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
907stop_if_error "Could not even run loopback test"
908
909run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
910stop_if_error "Could not even run loopback v6 test"
911
912for sender in $ns1 $ns2 $ns3 $ns4;do
913	# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
914	# mptcp syncookie support.
915	if [ $sender = $ns1 ]; then
916		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
917	else
918		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
919	fi
920
921	run_tests "$ns1" $sender 10.0.1.1
922	run_tests "$ns1" $sender dead:beef:1::1
923
924	run_tests "$ns2" $sender 10.0.1.2
925	run_tests "$ns2" $sender dead:beef:1::2
926	run_tests "$ns2" $sender 10.0.2.1
927	run_tests "$ns2" $sender dead:beef:2::1
928
929	run_tests "$ns3" $sender 10.0.2.2
930	run_tests "$ns3" $sender dead:beef:2::2
931	run_tests "$ns3" $sender 10.0.3.2
932	run_tests "$ns3" $sender dead:beef:3::2
933
934	run_tests "$ns4" $sender 10.0.3.1
935	run_tests "$ns4" $sender dead:beef:3::1
936
937	stop_if_error "Tests with $sender as a sender have failed"
938done
939
940run_tests_peekmode "saveWithPeek"
941run_tests_peekmode "saveAfterPeek"
942stop_if_error "Tests with peek mode have failed"
943
944# MPTFO (MultiPath TCP Fatopen tests)
945run_tests_mptfo
946stop_if_error "Tests with MPTFO have failed"
947
948# connect to ns4 ip address, ns2 should intercept/proxy
949run_test_transparent 10.0.3.1 "tproxy ipv4"
950run_test_transparent dead:beef:3::1 "tproxy ipv6"
951stop_if_error "Tests with tproxy have failed"
952
953run_tests_disconnect
954
955display_time
956exit $ret
957