1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4sec=$(date +%s)
5rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
6ns1="ns1-$rndh"
7ns2="ns2-$rndh"
8ns3="ns3-$rndh"
9capture=false
10ksft_skip=4
11timeout_poll=30
12timeout_test=$((timeout_poll * 2 + 1))
13test_cnt=1
14ret=0
15bail=0
16slack=50
17
18usage() {
19	echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
20	echo -e "\t-b: bail out after first error, otherwise runs al testcases"
21	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
22	echo -e "\t-d: debug this script"
23}
24
25cleanup()
26{
27	rm -f "$cout" "$sout"
28	rm -f "$large" "$small"
29	rm -f "$capout"
30
31	local netns
32	for netns in "$ns1" "$ns2" "$ns3";do
33		ip netns del $netns
34	done
35}
36
37ip -Version > /dev/null 2>&1
38if [ $? -ne 0 ];then
39	echo "SKIP: Could not run test without ip tool"
40	exit $ksft_skip
41fi
42
43#  "$ns1"              ns2                    ns3
44#     ns1eth1    ns2eth1   ns2eth3      ns3eth1
45#            netem
46#     ns1eth2    ns2eth2
47#            netem
48
49setup()
50{
51	large=$(mktemp)
52	small=$(mktemp)
53	sout=$(mktemp)
54	cout=$(mktemp)
55	capout=$(mktemp)
56	size=$((2 * 2048 * 4096))
57
58	dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
59	dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
60
61	trap cleanup EXIT
62
63	for i in "$ns1" "$ns2" "$ns3";do
64		ip netns add $i || exit $ksft_skip
65		ip -net $i link set lo up
66		ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
67		ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
68	done
69
70	ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
71	ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
72	ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
73
74	ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
75	ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
76	ip -net "$ns1" link set ns1eth1 up mtu 1500
77	ip -net "$ns1" route add default via 10.0.1.2
78	ip -net "$ns1" route add default via dead:beef:1::2
79
80	ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
81	ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
82	ip -net "$ns1" link set ns1eth2 up mtu 1500
83	ip -net "$ns1" route add default via 10.0.2.2 metric 101
84	ip -net "$ns1" route add default via dead:beef:2::2 metric 101
85
86	ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
87	ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
88
89	ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
90	ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
91	ip -net "$ns2" link set ns2eth1 up mtu 1500
92
93	ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
94	ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
95	ip -net "$ns2" link set ns2eth2 up mtu 1500
96
97	ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
98	ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
99	ip -net "$ns2" link set ns2eth3 up mtu 1500
100	ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
101	ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
102
103	ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
104	ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
105	ip -net "$ns3" link set ns3eth1 up mtu 1500
106	ip -net "$ns3" route add default via 10.0.3.2
107	ip -net "$ns3" route add default via dead:beef:3::2
108
109	ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
110
111	# debug build can slow down measurably the test program
112	# we use quite tight time limit on the run-time, to ensure
113	# maximum B/W usage.
114	# Use kmemleak/lockdep/kasan/prove_locking presence as a rough
115	# estimate for this being a debug kernel and increase the
116	# maximum run-time accordingly. Observed run times for CI builds
117	# running selftests, including kbuild, were used to determine the
118	# amount of time to add.
119	grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
120}
121
122# $1: ns, $2: port
123wait_local_port_listen()
124{
125	local listener_ns="${1}"
126	local port="${2}"
127
128	local port_hex i
129
130	port_hex="$(printf "%04X" "${port}")"
131	for i in $(seq 10); do
132		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
133			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
134			break
135		sleep 0.1
136	done
137}
138
139do_transfer()
140{
141	local cin=$1
142	local sin=$2
143	local max_time=$3
144	local port
145	port=$((10000+$test_cnt))
146	test_cnt=$((test_cnt+1))
147
148	:> "$cout"
149	:> "$sout"
150	:> "$capout"
151
152	if $capture; then
153		local capuser
154		if [ -z $SUDO_USER ] ; then
155			capuser=""
156		else
157			capuser="-Z $SUDO_USER"
158		fi
159
160		local capfile="${rndh}-${port}"
161		local capopt="-i any -s 65535 -B 32768 ${capuser}"
162
163		ip netns exec ${ns3}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
164		local cappid_listener=$!
165
166		ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
167		local cappid_connector=$!
168
169		sleep 1
170	fi
171
172	timeout ${timeout_test} \
173		ip netns exec ${ns3} \
174			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
175				0.0.0.0 < "$sin" > "$sout" &
176	local spid=$!
177
178	wait_local_port_listen "${ns3}" "${port}"
179
180	timeout ${timeout_test} \
181		ip netns exec ${ns1} \
182			./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
183				10.0.3.3 < "$cin" > "$cout" &
184	local cpid=$!
185
186	wait $cpid
187	local retc=$?
188	wait $spid
189	local rets=$?
190
191	if $capture; then
192		sleep 1
193		kill ${cappid_listener}
194		kill ${cappid_connector}
195	fi
196
197	cmp $sin $cout > /dev/null 2>&1
198	local cmps=$?
199	cmp $cin $sout > /dev/null 2>&1
200	local cmpc=$?
201
202	printf "%-16s" " max $max_time "
203	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
204	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
205		echo "[ OK ]"
206		cat "$capout"
207		return 0
208	fi
209
210	echo " [ fail ]"
211	echo "client exit code $retc, server $rets" 1>&2
212	echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
213	ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
214	echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
215	ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
216	ls -l $sin $cout
217	ls -l $cin $sout
218
219	cat "$capout"
220	return 1
221}
222
223run_test()
224{
225	local rate1=$1
226	local rate2=$2
227	local delay1=$3
228	local delay2=$4
229	local lret
230	local dev
231	shift 4
232	local msg=$*
233
234	[ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
235	[ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
236
237	for dev in ns1eth1 ns1eth2; do
238		tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
239	done
240	for dev in ns2eth1 ns2eth2; do
241		tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
242	done
243	tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
244	tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
245	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
246	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
247
248	# time is measured in ms, account for transfer size, aggregated link speed
249	# and header overhead (10%)
250	#              ms    byte -> bit   10%        mbit      -> kbit -> bit  10%
251	local time=$((1000 * size  *  8  * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) ))
252
253	# mptcp_connect will do some sleeps to allow the mp_join handshake
254	# completion (see mptcp_connect): 200ms on each side, add some slack
255	time=$((time + 400 + slack))
256
257	printf "%-60s" "$msg"
258	do_transfer $small $large $time
259	lret=$?
260	if [ $lret -ne 0 ]; then
261		ret=$lret
262		[ $bail -eq 0 ] || exit $ret
263	fi
264
265	printf "%-60s" "$msg - reverse direction"
266	do_transfer $large $small $time
267	lret=$?
268	if [ $lret -ne 0 ]; then
269		ret=$lret
270		[ $bail -eq 0 ] || exit $ret
271	fi
272}
273
274while getopts "bcdh" option;do
275	case "$option" in
276	"h")
277		usage $0
278		exit 0
279		;;
280	"b")
281		bail=1
282		;;
283	"c")
284		capture=true
285		;;
286	"d")
287		set -x
288		;;
289	"?")
290		usage $0
291		exit 1
292		;;
293	esac
294done
295
296setup
297run_test 10 10 0 0 "balanced bwidth"
298run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
299
300# we still need some additional infrastructure to pass the following test-cases
301run_test 30 10 0 0 "unbalanced bwidth"
302run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
303run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
304exit $ret
305