1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4time_start=$(date +%s)
5
6optstring="S:R:d:e:l:r:h4cm:"
7ret=0
8sin=""
9sout=""
10cin=""
11cout=""
12ksft_skip=4
13capture=false
14timeout=30
15ipv6=true
16ethtool_random_on=true
17tc_delay="$((RANDOM%400))"
18tc_loss=$((RANDOM%101))
19tc_reorder=""
20testmode=""
21sndbuf=0
22rcvbuf=0
23options_log=true
24
25if [ $tc_loss -eq 100 ];then
26	tc_loss=1%
27elif [ $tc_loss -ge 10 ]; then
28	tc_loss=0.$tc_loss%
29elif [ $tc_loss -ge 1 ]; then
30	tc_loss=0.0$tc_loss%
31else
32	tc_loss=""
33fi
34
35usage() {
36	echo "Usage: $0 [ -a ]"
37	echo -e "\t-d: tc/netem delay in milliseconds, e.g. \"-d 10\" (default random)"
38	echo -e "\t-l: tc/netem loss percentage, e.g. \"-l 0.02\" (default random)"
39	echo -e "\t-r: tc/netem reorder mode, e.g. \"-r 25% 50% gap 5\", use "-r 0" to disable reordering (default random)"
40	echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
41	echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
42	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
43	echo -e "\t-S: set sndbuf value (default: use kernel default)"
44	echo -e "\t-R: set rcvbuf value (default: use kernel default)"
45	echo -e "\t-m: test mode (poll, sendfile; default: poll)"
46}
47
48while getopts "$optstring" option;do
49	case "$option" in
50	"h")
51		usage $0
52		exit 0
53		;;
54	"d")
55		if [ $OPTARG -ge 0 ];then
56			tc_delay="$OPTARG"
57		else
58			echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2
59			exit 1
60		fi
61		;;
62	"e")
63		ethtool_args="$ethtool_args $OPTARG off"
64		ethtool_random_on=false
65		;;
66	"l")
67		tc_loss="$OPTARG"
68		;;
69	"r")
70		tc_reorder="$OPTARG"
71		;;
72	"4")
73		ipv6=false
74		;;
75	"c")
76		capture=true
77		;;
78	"S")
79		if [ $OPTARG -ge 0 ];then
80			sndbuf="$OPTARG"
81		else
82			echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
83			exit 1
84		fi
85		;;
86	"R")
87		if [ $OPTARG -ge 0 ];then
88			rcvbuf="$OPTARG"
89		else
90			echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
91			exit 1
92		fi
93		;;
94	"m")
95		testmode="$OPTARG"
96		;;
97	"?")
98		usage $0
99		exit 1
100		;;
101	esac
102done
103
104sec=$(date +%s)
105rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
106ns1="ns1-$rndh"
107ns2="ns2-$rndh"
108ns3="ns3-$rndh"
109ns4="ns4-$rndh"
110
111TEST_COUNT=0
112
113cleanup()
114{
115	rm -f "$cin" "$cout"
116	rm -f "$sin" "$sout"
117	rm -f "$capout"
118
119	local netns
120	for netns in "$ns1" "$ns2" "$ns3" "$ns4";do
121		ip netns del $netns
122	done
123}
124
125ip -Version > /dev/null 2>&1
126if [ $? -ne 0 ];then
127	echo "SKIP: Could not run test without ip tool"
128	exit $ksft_skip
129fi
130
131sin=$(mktemp)
132sout=$(mktemp)
133cin=$(mktemp)
134cout=$(mktemp)
135capout=$(mktemp)
136trap cleanup EXIT
137
138for i in "$ns1" "$ns2" "$ns3" "$ns4";do
139	ip netns add $i || exit $ksft_skip
140	ip -net $i link set lo up
141done
142
143#  "$ns1"              ns2                    ns3                     ns4
144# ns1eth2    ns2eth1   ns2eth3      ns3eth2   ns3eth4       ns4eth3
145#                           - drop 1% ->            reorder 25%
146#                           <- TSO off -
147
148ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
149ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth2 netns "$ns3"
150ip link add ns3eth4 netns "$ns3" type veth peer name ns4eth3 netns "$ns4"
151
152ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
153ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
154
155ip -net "$ns1" link set ns1eth2 up
156ip -net "$ns1" route add default via 10.0.1.2
157ip -net "$ns1" route add default via dead:beef:1::2
158
159ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
160ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
161ip -net "$ns2" link set ns2eth1 up
162
163ip -net "$ns2" addr add 10.0.2.1/24 dev ns2eth3
164ip -net "$ns2" addr add dead:beef:2::1/64 dev ns2eth3 nodad
165ip -net "$ns2" link set ns2eth3 up
166ip -net "$ns2" route add default via 10.0.2.2
167ip -net "$ns2" route add default via dead:beef:2::2
168ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
169ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
170
171ip -net "$ns3" addr add 10.0.2.2/24 dev ns3eth2
172ip -net "$ns3" addr add dead:beef:2::2/64 dev ns3eth2 nodad
173ip -net "$ns3" link set ns3eth2 up
174
175ip -net "$ns3" addr add 10.0.3.2/24 dev ns3eth4
176ip -net "$ns3" addr add dead:beef:3::2/64 dev ns3eth4 nodad
177ip -net "$ns3" link set ns3eth4 up
178ip -net "$ns3" route add default via 10.0.2.1
179ip -net "$ns3" route add default via dead:beef:2::1
180ip netns exec "$ns3" sysctl -q net.ipv4.ip_forward=1
181ip netns exec "$ns3" sysctl -q net.ipv6.conf.all.forwarding=1
182
183ip -net "$ns4" addr add 10.0.3.1/24 dev ns4eth3
184ip -net "$ns4" addr add dead:beef:3::1/64 dev ns4eth3 nodad
185ip -net "$ns4" link set ns4eth3 up
186ip -net "$ns4" route add default via 10.0.3.2
187ip -net "$ns4" route add default via dead:beef:3::2
188
189set_ethtool_flags() {
190	local ns="$1"
191	local dev="$2"
192	local flags="$3"
193
194	ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
195	[ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
196}
197
198set_random_ethtool_flags() {
199	local flags=""
200	local r=$RANDOM
201
202	local pick1=$((r & 1))
203	local pick2=$((r & 2))
204	local pick3=$((r & 4))
205
206	[ $pick1 -ne 0 ] && flags="tso off"
207	[ $pick2 -ne 0 ] && flags="$flags gso off"
208	[ $pick3 -ne 0 ] && flags="$flags gro off"
209
210	[ -z "$flags" ] && return
211
212	set_ethtool_flags "$1" "$2" "$flags"
213}
214
215if $ethtool_random_on;then
216	set_random_ethtool_flags "$ns3" ns3eth2
217	set_random_ethtool_flags "$ns4" ns4eth3
218else
219	set_ethtool_flags "$ns3" ns3eth2 "$ethtool_args"
220	set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
221fi
222
223print_file_err()
224{
225	ls -l "$1" 1>&2
226	echo "Trailing bytes are: "
227	tail -c 27 "$1"
228}
229
230check_transfer()
231{
232	local in=$1
233	local out=$2
234	local what=$3
235
236	cmp "$in" "$out" > /dev/null 2>&1
237	if [ $? -ne 0 ] ;then
238		echo "[ FAIL ] $what does not match (in, out):"
239		print_file_err "$in"
240		print_file_err "$out"
241
242		return 1
243	fi
244
245	return 0
246}
247
248check_mptcp_disabled()
249{
250	local disabled_ns
251	disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)"
252	ip netns add ${disabled_ns} || exit $ksft_skip
253
254	# net.mptcp.enabled should be enabled by default
255	if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
256		echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
257		ret=1
258		return 1
259	fi
260	ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
261
262	local err=0
263	LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
264		grep -q "^socket: Protocol not available$" && err=1
265	ip netns delete ${disabled_ns}
266
267	if [ ${err} -eq 0 ]; then
268		echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
269		ret=1
270		return 1
271	fi
272
273	echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
274	return 0
275}
276
277check_mptcp_ulp_setsockopt()
278{
279	local t retval
280	t="ns_ulp-$sech-$(mktemp -u XXXXXX)"
281
282	ip netns add ${t} || exit $ksft_skip
283	if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then
284		printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n"
285		retval=1
286		ret=$retval
287	else
288		printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n"
289		retval=0
290	fi
291	ip netns del ${t}
292	return $retval
293}
294
295# $1: IP address
296is_v6()
297{
298	[ -z "${1##*:*}" ]
299}
300
301do_ping()
302{
303	local listener_ns="$1"
304	local connector_ns="$2"
305	local connect_addr="$3"
306	local ping_args="-q -c 1"
307
308	if is_v6 "${connect_addr}"; then
309		$ipv6 || return 0
310		ping_args="${ping_args} -6"
311	fi
312
313	ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null
314	if [ $? -ne 0 ] ; then
315		echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
316		ret=1
317
318		return 1
319	fi
320
321	return 0
322}
323
324# $1: ns, $2: port
325wait_local_port_listen()
326{
327	local listener_ns="${1}"
328	local port="${2}"
329
330	local port_hex i
331
332	port_hex="$(printf "%04X" "${port}")"
333	for i in $(seq 10); do
334		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
335			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
336			break
337		sleep 0.1
338	done
339}
340
341do_transfer()
342{
343	local listener_ns="$1"
344	local connector_ns="$2"
345	local cl_proto="$3"
346	local srv_proto="$4"
347	local connect_addr="$5"
348	local local_addr="$6"
349	local extra_args=""
350
351	local port
352	port=$((10000+$TEST_COUNT))
353	TEST_COUNT=$((TEST_COUNT+1))
354
355	if [ "$rcvbuf" -gt 0 ]; then
356		extra_args="$extra_args -R $rcvbuf"
357	fi
358
359	if [ "$sndbuf" -gt 0 ]; then
360		extra_args="$extra_args -S $sndbuf"
361	fi
362
363	if [ -n "$testmode" ]; then
364		extra_args="$extra_args -m $testmode"
365	fi
366
367	if [ -n "$extra_args" ] && $options_log; then
368		options_log=false
369		echo "INFO: extra options: $extra_args"
370	fi
371
372	:> "$cout"
373	:> "$sout"
374	:> "$capout"
375
376	local addr_port
377	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
378	printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto}
379
380	if $capture; then
381		local capuser
382		if [ -z $SUDO_USER ] ; then
383			capuser=""
384		else
385			capuser="-Z $SUDO_USER"
386		fi
387
388		local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap"
389
390		ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
391		local cappid=$!
392
393		sleep 1
394	fi
395
396	ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
397	local spid=$!
398
399	wait_local_port_listen "${listener_ns}" "${port}"
400
401	local start
402	start=$(date +%s%3N)
403	ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" &
404	local cpid=$!
405
406	wait $cpid
407	local retc=$?
408	wait $spid
409	local rets=$?
410
411	local stop
412	stop=$(date +%s%3N)
413
414	if $capture; then
415		sleep 1
416		kill $cappid
417	fi
418
419	local duration
420	duration=$((stop-start))
421	duration=$(printf "(duration %05sms)" $duration)
422	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
423		echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
424		echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
425		ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
426		echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
427		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
428
429		cat "$capout"
430		return 1
431	fi
432
433	check_transfer $sin $cout "file received by client"
434	retc=$?
435	check_transfer $cin $sout "file received by server"
436	rets=$?
437
438	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
439		echo "$duration [ OK ]"
440		cat "$capout"
441		return 0
442	fi
443
444	cat "$capout"
445	return 1
446}
447
448make_file()
449{
450	local name=$1
451	local who=$2
452
453	local SIZE TSIZE
454	SIZE=$((RANDOM % (1024 * 8)))
455	TSIZE=$((SIZE * 1024))
456
457	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
458
459	SIZE=$((RANDOM % 1024))
460	SIZE=$((SIZE + 128))
461	TSIZE=$((TSIZE + SIZE))
462	dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
463	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
464
465	echo "Created $name (size $TSIZE) containing data sent by $who"
466}
467
468run_tests_lo()
469{
470	local listener_ns="$1"
471	local connector_ns="$2"
472	local connect_addr="$3"
473	local loopback="$4"
474	local lret=0
475
476	# skip if test programs are running inside same netns for subsequent runs.
477	if [ $loopback -eq 0 ] && [ ${listener_ns} = ${connector_ns} ]; then
478		return 0
479	fi
480
481	# skip if we don't want v6
482	if ! $ipv6 && is_v6 "${connect_addr}"; then
483		return 0
484	fi
485
486	local local_addr
487	if is_v6 "${connect_addr}"; then
488		local_addr="::"
489	else
490		local_addr="0.0.0.0"
491	fi
492
493	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr}
494	lret=$?
495	if [ $lret -ne 0 ]; then
496		ret=$lret
497		return 1
498	fi
499
500	# don't bother testing fallback tcp except for loopback case.
501	if [ ${listener_ns} != ${connector_ns} ]; then
502		return 0
503	fi
504
505	do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
506	lret=$?
507	if [ $lret -ne 0 ]; then
508		ret=$lret
509		return 1
510	fi
511
512	do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr}
513	lret=$?
514	if [ $lret -ne 0 ]; then
515		ret=$lret
516		return 1
517	fi
518
519	return 0
520}
521
522run_tests()
523{
524	run_tests_lo $1 $2 $3 0
525}
526
527make_file "$cin" "client"
528make_file "$sin" "server"
529
530check_mptcp_disabled
531
532check_mptcp_ulp_setsockopt
533
534echo "INFO: validating network environment with pings"
535for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
536	do_ping "$ns1" $sender 10.0.1.1
537	do_ping "$ns1" $sender dead:beef:1::1
538
539	do_ping "$ns2" $sender 10.0.1.2
540	do_ping "$ns2" $sender dead:beef:1::2
541	do_ping "$ns2" $sender 10.0.2.1
542	do_ping "$ns2" $sender dead:beef:2::1
543
544	do_ping "$ns3" $sender 10.0.2.2
545	do_ping "$ns3" $sender dead:beef:2::2
546	do_ping "$ns3" $sender 10.0.3.2
547	do_ping "$ns3" $sender dead:beef:3::2
548
549	do_ping "$ns4" $sender 10.0.3.1
550	do_ping "$ns4" $sender dead:beef:3::1
551done
552
553[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss
554echo -n "INFO: Using loss of $tc_loss "
555test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
556
557if [ -z "${tc_reorder}" ]; then
558	reorder1=$((RANDOM%10))
559	reorder1=$((100 - reorder1))
560	reorder2=$((RANDOM%100))
561
562	if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
563		tc_reorder="reorder ${reorder1}% ${reorder2}%"
564		echo -n "$tc_reorder "
565	fi
566elif [ "$tc_reorder" = "0" ];then
567	tc_reorder=""
568elif [ "$tc_delay" -gt 0 ];then
569	# reordering requires some delay
570	tc_reorder="reorder $tc_reorder"
571	echo -n "$tc_reorder "
572fi
573
574echo "on ns3eth4"
575
576tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder
577
578for sender in $ns1 $ns2 $ns3 $ns4;do
579	run_tests_lo "$ns1" "$sender" 10.0.1.1 1
580	if [ $ret -ne 0 ] ;then
581		echo "FAIL: Could not even run loopback test" 1>&2
582		exit $ret
583	fi
584	run_tests_lo "$ns1" $sender dead:beef:1::1 1
585	if [ $ret -ne 0 ] ;then
586		echo "FAIL: Could not even run loopback v6 test" 2>&1
587		exit $ret
588	fi
589
590	run_tests "$ns2" $sender 10.0.1.2
591	run_tests "$ns2" $sender dead:beef:1::2
592	run_tests "$ns2" $sender 10.0.2.1
593	run_tests "$ns2" $sender dead:beef:2::1
594
595	run_tests "$ns3" $sender 10.0.2.2
596	run_tests "$ns3" $sender dead:beef:2::2
597	run_tests "$ns3" $sender 10.0.3.2
598	run_tests "$ns3" $sender dead:beef:3::2
599
600	run_tests "$ns4" $sender 10.0.3.1
601	run_tests "$ns4" $sender dead:beef:3::1
602done
603
604time_end=$(date +%s)
605time_run=$((time_end-time_start))
606
607echo "Time: ${time_run} seconds"
608
609exit $ret
610