1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4. "$(dirname "${0}")/mptcp_lib.sh"
5
6sec=$(date +%s)
7rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
8ns1="ns1-$rndh"
9ns2="ns2-$rndh"
10ns3="ns3-$rndh"
11capture=false
12ksft_skip=4
13timeout_poll=30
14timeout_test=$((timeout_poll * 2 + 1))
15test_cnt=1
16ret=0
17bail=0
18slack=50
19
20usage() {
21	echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
22	echo -e "\t-b: bail out after first error, otherwise runs al testcases"
23	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
24	echo -e "\t-d: debug this script"
25}
26
27cleanup()
28{
29	rm -f "$cout" "$sout"
30	rm -f "$large" "$small"
31	rm -f "$capout"
32
33	local netns
34	for netns in "$ns1" "$ns2" "$ns3";do
35		ip netns del $netns
36	done
37}
38
39mptcp_lib_check_mptcp
40
41ip -Version > /dev/null 2>&1
42if [ $? -ne 0 ];then
43	echo "SKIP: Could not run test without ip tool"
44	exit $ksft_skip
45fi
46
47#  "$ns1"              ns2                    ns3
48#     ns1eth1    ns2eth1   ns2eth3      ns3eth1
49#            netem
50#     ns1eth2    ns2eth2
51#            netem
52
53setup()
54{
55	large=$(mktemp)
56	small=$(mktemp)
57	sout=$(mktemp)
58	cout=$(mktemp)
59	capout=$(mktemp)
60	size=$((2 * 2048 * 4096))
61
62	dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
63	dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
64
65	trap cleanup EXIT
66
67	for i in "$ns1" "$ns2" "$ns3";do
68		ip netns add $i || exit $ksft_skip
69		ip -net $i link set lo up
70		ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
71		ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
72	done
73
74	ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
75	ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
76	ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
77
78	ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
79	ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
80	ip -net "$ns1" link set ns1eth1 up mtu 1500
81	ip -net "$ns1" route add default via 10.0.1.2
82	ip -net "$ns1" route add default via dead:beef:1::2
83
84	ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
85	ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
86	ip -net "$ns1" link set ns1eth2 up mtu 1500
87	ip -net "$ns1" route add default via 10.0.2.2 metric 101
88	ip -net "$ns1" route add default via dead:beef:2::2 metric 101
89
90	ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
91	ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
92
93	ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
94	ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
95	ip -net "$ns2" link set ns2eth1 up mtu 1500
96
97	ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
98	ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
99	ip -net "$ns2" link set ns2eth2 up mtu 1500
100
101	ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
102	ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
103	ip -net "$ns2" link set ns2eth3 up mtu 1500
104	ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
105	ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
106
107	ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
108	ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
109	ip -net "$ns3" link set ns3eth1 up mtu 1500
110	ip -net "$ns3" route add default via 10.0.3.2
111	ip -net "$ns3" route add default via dead:beef:3::2
112
113	ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
114
115	# debug build can slow down measurably the test program
116	# we use quite tight time limit on the run-time, to ensure
117	# maximum B/W usage.
118	# Use kmemleak/lockdep/kasan/prove_locking presence as a rough
119	# estimate for this being a debug kernel and increase the
120	# maximum run-time accordingly. Observed run times for CI builds
121	# running selftests, including kbuild, were used to determine the
122	# amount of time to add.
123	grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
124}
125
126# $1: ns, $2: port
127wait_local_port_listen()
128{
129	local listener_ns="${1}"
130	local port="${2}"
131
132	local port_hex i
133
134	port_hex="$(printf "%04X" "${port}")"
135	for i in $(seq 10); do
136		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
137			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
138			break
139		sleep 0.1
140	done
141}
142
143do_transfer()
144{
145	local cin=$1
146	local sin=$2
147	local max_time=$3
148	local port
149	port=$((10000+$test_cnt))
150	test_cnt=$((test_cnt+1))
151
152	:> "$cout"
153	:> "$sout"
154	:> "$capout"
155
156	if $capture; then
157		local capuser
158		if [ -z $SUDO_USER ] ; then
159			capuser=""
160		else
161			capuser="-Z $SUDO_USER"
162		fi
163
164		local capfile="${rndh}-${port}"
165		local capopt="-i any -s 65535 -B 32768 ${capuser}"
166
167		ip netns exec ${ns3}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
168		local cappid_listener=$!
169
170		ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
171		local cappid_connector=$!
172
173		sleep 1
174	fi
175
176	timeout ${timeout_test} \
177		ip netns exec ${ns3} \
178			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
179				0.0.0.0 < "$sin" > "$sout" &
180	local spid=$!
181
182	wait_local_port_listen "${ns3}" "${port}"
183
184	timeout ${timeout_test} \
185		ip netns exec ${ns1} \
186			./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
187				10.0.3.3 < "$cin" > "$cout" &
188	local cpid=$!
189
190	wait $cpid
191	local retc=$?
192	wait $spid
193	local rets=$?
194
195	if $capture; then
196		sleep 1
197		kill ${cappid_listener}
198		kill ${cappid_connector}
199	fi
200
201	cmp $sin $cout > /dev/null 2>&1
202	local cmps=$?
203	cmp $cin $sout > /dev/null 2>&1
204	local cmpc=$?
205
206	printf "%-16s" " max $max_time "
207	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
208	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
209		echo "[ OK ]"
210		cat "$capout"
211		return 0
212	fi
213
214	echo " [ fail ]"
215	echo "client exit code $retc, server $rets" 1>&2
216	echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
217	ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
218	echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
219	ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
220	ls -l $sin $cout
221	ls -l $cin $sout
222
223	cat "$capout"
224	return 1
225}
226
227run_test()
228{
229	local rate1=$1
230	local rate2=$2
231	local delay1=$3
232	local delay2=$4
233	local lret
234	local dev
235	shift 4
236	local msg=$*
237
238	[ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
239	[ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
240
241	for dev in ns1eth1 ns1eth2; do
242		tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
243	done
244	for dev in ns2eth1 ns2eth2; do
245		tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
246	done
247	tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
248	tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
249	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
250	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
251
252	# time is measured in ms, account for transfer size, aggregated link speed
253	# and header overhead (10%)
254	#              ms    byte -> bit   10%        mbit      -> kbit -> bit  10%
255	local time=$((1000 * size  *  8  * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) ))
256
257	# mptcp_connect will do some sleeps to allow the mp_join handshake
258	# completion (see mptcp_connect): 200ms on each side, add some slack
259	time=$((time + 400 + slack))
260
261	printf "%-60s" "$msg"
262	do_transfer $small $large $time
263	lret=$?
264	if [ $lret -ne 0 ]; then
265		ret=$lret
266		[ $bail -eq 0 ] || exit $ret
267	fi
268
269	printf "%-60s" "$msg - reverse direction"
270	do_transfer $large $small $time
271	lret=$?
272	if [ $lret -ne 0 ]; then
273		ret=$lret
274		[ $bail -eq 0 ] || exit $ret
275	fi
276}
277
278while getopts "bcdh" option;do
279	case "$option" in
280	"h")
281		usage $0
282		exit 0
283		;;
284	"b")
285		bail=1
286		;;
287	"c")
288		capture=true
289		;;
290	"d")
291		set -x
292		;;
293	"?")
294		usage $0
295		exit 1
296		;;
297	esac
298done
299
300setup
301run_test 10 10 0 0 "balanced bwidth"
302run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
303
304# we still need some additional infrastructure to pass the following test-cases
305run_test 30 10 0 0 "unbalanced bwidth"
306run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
307run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
308exit $ret
309