1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4time_start=$(date +%s)
5
6optstring="S:R:d:e:l:r:h4cm:f:t"
7ret=0
8sin=""
9sout=""
10cin=""
11cout=""
12ksft_skip=4
13capture=false
14timeout=30
15ipv6=true
16ethtool_random_on=true
17tc_delay="$((RANDOM%400))"
18tc_loss=$((RANDOM%101))
19tc_reorder=""
20testmode=""
21sndbuf=0
22rcvbuf=0
23options_log=true
24do_tcp=0
25filesize=0
26
27if [ $tc_loss -eq 100 ];then
28	tc_loss=1%
29elif [ $tc_loss -ge 10 ]; then
30	tc_loss=0.$tc_loss%
31elif [ $tc_loss -ge 1 ]; then
32	tc_loss=0.0$tc_loss%
33else
34	tc_loss=""
35fi
36
37usage() {
38	echo "Usage: $0 [ -a ]"
39	echo -e "\t-d: tc/netem delay in milliseconds, e.g. \"-d 10\" (default random)"
40	echo -e "\t-l: tc/netem loss percentage, e.g. \"-l 0.02\" (default random)"
41	echo -e "\t-r: tc/netem reorder mode, e.g. \"-r 25% 50% gap 5\", use "-r 0" to disable reordering (default random)"
42	echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
43	echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
44	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
45	echo -e "\t-f: size of file to transfer in bytes (default random)"
46	echo -e "\t-S: set sndbuf value (default: use kernel default)"
47	echo -e "\t-R: set rcvbuf value (default: use kernel default)"
48	echo -e "\t-m: test mode (poll, sendfile; default: poll)"
49	echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
50}
51
52while getopts "$optstring" option;do
53	case "$option" in
54	"h")
55		usage $0
56		exit 0
57		;;
58	"d")
59		if [ $OPTARG -ge 0 ];then
60			tc_delay="$OPTARG"
61		else
62			echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2
63			exit 1
64		fi
65		;;
66	"e")
67		ethtool_args="$ethtool_args $OPTARG off"
68		ethtool_random_on=false
69		;;
70	"l")
71		tc_loss="$OPTARG"
72		;;
73	"r")
74		tc_reorder="$OPTARG"
75		;;
76	"4")
77		ipv6=false
78		;;
79	"c")
80		capture=true
81		;;
82	"S")
83		if [ $OPTARG -ge 0 ];then
84			sndbuf="$OPTARG"
85		else
86			echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
87			exit 1
88		fi
89		;;
90	"R")
91		if [ $OPTARG -ge 0 ];then
92			rcvbuf="$OPTARG"
93		else
94			echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
95			exit 1
96		fi
97		;;
98	"m")
99		testmode="$OPTARG"
100		;;
101	"f")
102		filesize="$OPTARG"
103		;;
104	"t")
105		do_tcp=$((do_tcp+1))
106		;;
107	"?")
108		usage $0
109		exit 1
110		;;
111	esac
112done
113
114sec=$(date +%s)
115rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
116ns1="ns1-$rndh"
117ns2="ns2-$rndh"
118ns3="ns3-$rndh"
119ns4="ns4-$rndh"
120
121TEST_COUNT=0
122
123cleanup()
124{
125	rm -f "$cin" "$cout"
126	rm -f "$sin" "$sout"
127	rm -f "$capout"
128
129	local netns
130	for netns in "$ns1" "$ns2" "$ns3" "$ns4";do
131		ip netns del $netns
132	done
133}
134
135ip -Version > /dev/null 2>&1
136if [ $? -ne 0 ];then
137	echo "SKIP: Could not run test without ip tool"
138	exit $ksft_skip
139fi
140
141sin=$(mktemp)
142sout=$(mktemp)
143cin=$(mktemp)
144cout=$(mktemp)
145capout=$(mktemp)
146trap cleanup EXIT
147
148for i in "$ns1" "$ns2" "$ns3" "$ns4";do
149	ip netns add $i || exit $ksft_skip
150	ip -net $i link set lo up
151done
152
153#  "$ns1"              ns2                    ns3                     ns4
154# ns1eth2    ns2eth1   ns2eth3      ns3eth2   ns3eth4       ns4eth3
155#                           - drop 1% ->            reorder 25%
156#                           <- TSO off -
157
158ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
159ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth2 netns "$ns3"
160ip link add ns3eth4 netns "$ns3" type veth peer name ns4eth3 netns "$ns4"
161
162ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
163ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
164
165ip -net "$ns1" link set ns1eth2 up
166ip -net "$ns1" route add default via 10.0.1.2
167ip -net "$ns1" route add default via dead:beef:1::2
168
169ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
170ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
171ip -net "$ns2" link set ns2eth1 up
172
173ip -net "$ns2" addr add 10.0.2.1/24 dev ns2eth3
174ip -net "$ns2" addr add dead:beef:2::1/64 dev ns2eth3 nodad
175ip -net "$ns2" link set ns2eth3 up
176ip -net "$ns2" route add default via 10.0.2.2
177ip -net "$ns2" route add default via dead:beef:2::2
178ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
179ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
180
181ip -net "$ns3" addr add 10.0.2.2/24 dev ns3eth2
182ip -net "$ns3" addr add dead:beef:2::2/64 dev ns3eth2 nodad
183ip -net "$ns3" link set ns3eth2 up
184
185ip -net "$ns3" addr add 10.0.3.2/24 dev ns3eth4
186ip -net "$ns3" addr add dead:beef:3::2/64 dev ns3eth4 nodad
187ip -net "$ns3" link set ns3eth4 up
188ip -net "$ns3" route add default via 10.0.2.1
189ip -net "$ns3" route add default via dead:beef:2::1
190ip netns exec "$ns3" sysctl -q net.ipv4.ip_forward=1
191ip netns exec "$ns3" sysctl -q net.ipv6.conf.all.forwarding=1
192
193ip -net "$ns4" addr add 10.0.3.1/24 dev ns4eth3
194ip -net "$ns4" addr add dead:beef:3::1/64 dev ns4eth3 nodad
195ip -net "$ns4" link set ns4eth3 up
196ip -net "$ns4" route add default via 10.0.3.2
197ip -net "$ns4" route add default via dead:beef:3::2
198
199set_ethtool_flags() {
200	local ns="$1"
201	local dev="$2"
202	local flags="$3"
203
204	ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
205	[ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
206}
207
208set_random_ethtool_flags() {
209	local flags=""
210	local r=$RANDOM
211
212	local pick1=$((r & 1))
213	local pick2=$((r & 2))
214	local pick3=$((r & 4))
215
216	[ $pick1 -ne 0 ] && flags="tso off"
217	[ $pick2 -ne 0 ] && flags="$flags gso off"
218	[ $pick3 -ne 0 ] && flags="$flags gro off"
219
220	[ -z "$flags" ] && return
221
222	set_ethtool_flags "$1" "$2" "$flags"
223}
224
225if $ethtool_random_on;then
226	set_random_ethtool_flags "$ns3" ns3eth2
227	set_random_ethtool_flags "$ns4" ns4eth3
228else
229	set_ethtool_flags "$ns3" ns3eth2 "$ethtool_args"
230	set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
231fi
232
233print_file_err()
234{
235	ls -l "$1" 1>&2
236	echo "Trailing bytes are: "
237	tail -c 27 "$1"
238}
239
240check_transfer()
241{
242	local in=$1
243	local out=$2
244	local what=$3
245
246	cmp "$in" "$out" > /dev/null 2>&1
247	if [ $? -ne 0 ] ;then
248		echo "[ FAIL ] $what does not match (in, out):"
249		print_file_err "$in"
250		print_file_err "$out"
251
252		return 1
253	fi
254
255	return 0
256}
257
258check_mptcp_disabled()
259{
260	local disabled_ns
261	disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)"
262	ip netns add ${disabled_ns} || exit $ksft_skip
263
264	# net.mptcp.enabled should be enabled by default
265	if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
266		echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
267		ret=1
268		return 1
269	fi
270	ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
271
272	local err=0
273	LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
274		grep -q "^socket: Protocol not available$" && err=1
275	ip netns delete ${disabled_ns}
276
277	if [ ${err} -eq 0 ]; then
278		echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
279		ret=1
280		return 1
281	fi
282
283	echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
284	return 0
285}
286
287check_mptcp_ulp_setsockopt()
288{
289	local t retval
290	t="ns_ulp-$sech-$(mktemp -u XXXXXX)"
291
292	ip netns add ${t} || exit $ksft_skip
293	if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then
294		printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n"
295		retval=1
296		ret=$retval
297	else
298		printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n"
299		retval=0
300	fi
301	ip netns del ${t}
302	return $retval
303}
304
305# $1: IP address
306is_v6()
307{
308	[ -z "${1##*:*}" ]
309}
310
311do_ping()
312{
313	local listener_ns="$1"
314	local connector_ns="$2"
315	local connect_addr="$3"
316	local ping_args="-q -c 1"
317
318	if is_v6 "${connect_addr}"; then
319		$ipv6 || return 0
320		ping_args="${ping_args} -6"
321	fi
322
323	ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null
324	if [ $? -ne 0 ] ; then
325		echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
326		ret=1
327
328		return 1
329	fi
330
331	return 0
332}
333
334# $1: ns, $2: port
335wait_local_port_listen()
336{
337	local listener_ns="${1}"
338	local port="${2}"
339
340	local port_hex i
341
342	port_hex="$(printf "%04X" "${port}")"
343	for i in $(seq 10); do
344		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
345			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
346			break
347		sleep 0.1
348	done
349}
350
351do_transfer()
352{
353	local listener_ns="$1"
354	local connector_ns="$2"
355	local cl_proto="$3"
356	local srv_proto="$4"
357	local connect_addr="$5"
358	local local_addr="$6"
359	local extra_args=""
360
361	local port
362	port=$((10000+$TEST_COUNT))
363	TEST_COUNT=$((TEST_COUNT+1))
364
365	if [ "$rcvbuf" -gt 0 ]; then
366		extra_args="$extra_args -R $rcvbuf"
367	fi
368
369	if [ "$sndbuf" -gt 0 ]; then
370		extra_args="$extra_args -S $sndbuf"
371	fi
372
373	if [ -n "$testmode" ]; then
374		extra_args="$extra_args -m $testmode"
375	fi
376
377	if [ -n "$extra_args" ] && $options_log; then
378		options_log=false
379		echo "INFO: extra options: $extra_args"
380	fi
381
382	:> "$cout"
383	:> "$sout"
384	:> "$capout"
385
386	local addr_port
387	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
388	printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto}
389
390	if $capture; then
391		local capuser
392		if [ -z $SUDO_USER ] ; then
393			capuser=""
394		else
395			capuser="-Z $SUDO_USER"
396		fi
397
398		local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
399		local capopt="-i any -s 65535 -B 32768 ${capuser}"
400
401		ip netns exec ${listener_ns}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
402		local cappid_listener=$!
403
404		ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
405		local cappid_connector=$!
406
407		sleep 1
408	fi
409
410	ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
411	local spid=$!
412
413	wait_local_port_listen "${listener_ns}" "${port}"
414
415	local start
416	start=$(date +%s%3N)
417	ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" &
418	local cpid=$!
419
420	wait $cpid
421	local retc=$?
422	wait $spid
423	local rets=$?
424
425	local stop
426	stop=$(date +%s%3N)
427
428	if $capture; then
429		sleep 1
430		kill ${cappid_listener}
431		kill ${cappid_connector}
432	fi
433
434	local duration
435	duration=$((stop-start))
436	duration=$(printf "(duration %05sms)" $duration)
437	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
438		echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
439		echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
440		ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
441		echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
442		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
443
444		cat "$capout"
445		return 1
446	fi
447
448	check_transfer $sin $cout "file received by client"
449	retc=$?
450	check_transfer $cin $sout "file received by server"
451	rets=$?
452
453	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
454		echo "$duration [ OK ]"
455		cat "$capout"
456		return 0
457	fi
458
459	cat "$capout"
460	return 1
461}
462
463make_file()
464{
465	local name=$1
466	local who=$2
467	local SIZE=$filesize
468	local ksize
469	local rem
470
471	if [ $SIZE -eq 0 ]; then
472		local MAXSIZE=$((1024 * 1024 * 8))
473		local MINSIZE=$((1024 * 256))
474
475		SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE))
476	fi
477
478	ksize=$((SIZE / 1024))
479	rem=$((SIZE - (ksize * 1024)))
480
481	dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
482	dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
483	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
484
485	echo "Created $name (size $(du -b "$name")) containing data sent by $who"
486}
487
488run_tests_lo()
489{
490	local listener_ns="$1"
491	local connector_ns="$2"
492	local connect_addr="$3"
493	local loopback="$4"
494	local lret=0
495
496	# skip if test programs are running inside same netns for subsequent runs.
497	if [ $loopback -eq 0 ] && [ ${listener_ns} = ${connector_ns} ]; then
498		return 0
499	fi
500
501	# skip if we don't want v6
502	if ! $ipv6 && is_v6 "${connect_addr}"; then
503		return 0
504	fi
505
506	local local_addr
507	if is_v6 "${connect_addr}"; then
508		local_addr="::"
509	else
510		local_addr="0.0.0.0"
511	fi
512
513	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr}
514	lret=$?
515	if [ $lret -ne 0 ]; then
516		ret=$lret
517		return 1
518	fi
519
520	if [ $do_tcp -eq 0 ]; then
521		# don't bother testing fallback tcp except for loopback case.
522		if [ ${listener_ns} != ${connector_ns} ]; then
523			return 0
524		fi
525	fi
526
527	do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
528	lret=$?
529	if [ $lret -ne 0 ]; then
530		ret=$lret
531		return 1
532	fi
533
534	do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr}
535	lret=$?
536	if [ $lret -ne 0 ]; then
537		ret=$lret
538		return 1
539	fi
540
541	if [ $do_tcp -gt 1 ] ;then
542		do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr}
543		lret=$?
544		if [ $lret -ne 0 ]; then
545			ret=$lret
546			return 1
547		fi
548	fi
549
550	return 0
551}
552
553run_tests()
554{
555	run_tests_lo $1 $2 $3 0
556}
557
558make_file "$cin" "client"
559make_file "$sin" "server"
560
561check_mptcp_disabled
562
563check_mptcp_ulp_setsockopt
564
565echo "INFO: validating network environment with pings"
566for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
567	do_ping "$ns1" $sender 10.0.1.1
568	do_ping "$ns1" $sender dead:beef:1::1
569
570	do_ping "$ns2" $sender 10.0.1.2
571	do_ping "$ns2" $sender dead:beef:1::2
572	do_ping "$ns2" $sender 10.0.2.1
573	do_ping "$ns2" $sender dead:beef:2::1
574
575	do_ping "$ns3" $sender 10.0.2.2
576	do_ping "$ns3" $sender dead:beef:2::2
577	do_ping "$ns3" $sender 10.0.3.2
578	do_ping "$ns3" $sender dead:beef:3::2
579
580	do_ping "$ns4" $sender 10.0.3.1
581	do_ping "$ns4" $sender dead:beef:3::1
582done
583
584[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss
585echo -n "INFO: Using loss of $tc_loss "
586test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
587
588if [ -z "${tc_reorder}" ]; then
589	reorder1=$((RANDOM%10))
590	reorder1=$((100 - reorder1))
591	reorder2=$((RANDOM%100))
592
593	if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
594		tc_reorder="reorder ${reorder1}% ${reorder2}%"
595		echo -n "$tc_reorder "
596	fi
597elif [ "$tc_reorder" = "0" ];then
598	tc_reorder=""
599elif [ "$tc_delay" -gt 0 ];then
600	# reordering requires some delay
601	tc_reorder="reorder $tc_reorder"
602	echo -n "$tc_reorder "
603fi
604
605echo "on ns3eth4"
606
607tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder
608
609for sender in $ns1 $ns2 $ns3 $ns4;do
610	run_tests_lo "$ns1" "$sender" 10.0.1.1 1
611	if [ $ret -ne 0 ] ;then
612		echo "FAIL: Could not even run loopback test" 1>&2
613		exit $ret
614	fi
615	run_tests_lo "$ns1" $sender dead:beef:1::1 1
616	if [ $ret -ne 0 ] ;then
617		echo "FAIL: Could not even run loopback v6 test" 2>&1
618		exit $ret
619	fi
620
621	run_tests "$ns2" $sender 10.0.1.2
622	run_tests "$ns2" $sender dead:beef:1::2
623	run_tests "$ns2" $sender 10.0.2.1
624	run_tests "$ns2" $sender dead:beef:2::1
625
626	run_tests "$ns3" $sender 10.0.2.2
627	run_tests "$ns3" $sender dead:beef:2::2
628	run_tests "$ns3" $sender 10.0.3.2
629	run_tests "$ns3" $sender dead:beef:3::2
630
631	run_tests "$ns4" $sender 10.0.3.1
632	run_tests "$ns4" $sender dead:beef:3::1
633done
634
635time_end=$(date +%s)
636time_run=$((time_end-time_start))
637
638echo "Time: ${time_run} seconds"
639
640exit $ret
641