1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Check that route PMTU values match expectations, and that initial device MTU
5# values are assigned correctly
6#
7# Tests currently implemented:
8#
9# - pmtu_vti4_exception
10#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
11#	namespaces with matching endpoints. Check that route exception is not
12#	created if link layer MTU is not exceeded, then exceed it and check that
13#	exception is created with the expected PMTU. The approach described
14#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
15#	changes alone won't affect PMTU
16#
17# - pmtu_vti6_exception
18#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
19#	namespaces with matching endpoints. Check that route exception is
20#	created by exceeding link layer MTU with ping to other endpoint. Then
21#	decrease and increase MTU of tunnel, checking that route exception PMTU
22#	changes accordingly
23#
24# - pmtu_vti4_default_mtu
25#	Set up vti4 tunnel on top of veth, in two namespaces with matching
26#	endpoints. Check that MTU assigned to vti interface is the MTU of the
27#	lower layer (veth) minus additional lower layer headers (zero, for veth)
28#	minus IPv4 header length
29#
30# - pmtu_vti6_default_mtu
31#	Same as above, for IPv6
32#
33# - pmtu_vti4_link_add_mtu
34#	Set up vti4 interface passing MTU value at link creation, check MTU is
35#	configured, and that link is not created with invalid MTU values
36#
37# - pmtu_vti6_link_add_mtu
38#	Same as above, for IPv6
39#
40# - pmtu_vti6_link_change_mtu
41#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
42#	and check that configured MTU is used on link creation and changes, and
43#	that MTU is properly calculated instead when MTU is not configured from
44#	userspace
45
46tests="
47	pmtu_vti6_exception		vti6: PMTU exceptions
48	pmtu_vti4_exception		vti4: PMTU exceptions
49	pmtu_vti4_default_mtu		vti4: default MTU assignment
50	pmtu_vti6_default_mtu		vti6: default MTU assignment
51	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
52	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
53	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
54
55NS_A="ns-$(mktemp -u XXXXXX)"
56NS_B="ns-$(mktemp -u XXXXXX)"
57ns_a="ip netns exec ${NS_A}"
58ns_b="ip netns exec ${NS_B}"
59
60veth4_a_addr="192.168.1.1"
61veth4_b_addr="192.168.1.2"
62veth4_mask="24"
63veth6_a_addr="fd00:1::a"
64veth6_b_addr="fd00:1::b"
65veth6_mask="64"
66
67vti4_a_addr="192.168.2.1"
68vti4_b_addr="192.168.2.2"
69vti4_mask="24"
70vti6_a_addr="fd00:2::a"
71vti6_b_addr="fd00:2::b"
72vti6_mask="64"
73
74dummy6_0_addr="fc00:1000::0"
75dummy6_1_addr="fc00:1001::0"
76dummy6_mask="64"
77
78cleanup_done=1
79err_buf=
80
81err() {
82	err_buf="${err_buf}${1}
83"
84}
85
86err_flush() {
87	echo -n "${err_buf}"
88	err_buf=
89}
90
91setup_namespaces() {
92	ip netns add ${NS_A} || return 1
93	ip netns add ${NS_B}
94}
95
96setup_veth() {
97	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
98	${ns_a} ip link set veth_b netns ${NS_B}
99
100	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
101	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
102
103	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
104	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
105
106	${ns_a} ip link set veth_a up
107	${ns_b} ip link set veth_b up
108}
109
110setup_vti() {
111	proto=${1}
112	veth_a_addr="${2}"
113	veth_b_addr="${3}"
114	vti_a_addr="${4}"
115	vti_b_addr="${5}"
116	vti_mask=${6}
117
118	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
119
120	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
121	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
122
123	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
124	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
125
126	${ns_a} ip link set vti${proto}_a up
127	${ns_b} ip link set vti${proto}_b up
128
129	sleep 1
130}
131
132setup_vti4() {
133	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
134}
135
136setup_vti6() {
137	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
138}
139
140setup_xfrm() {
141	proto=${1}
142	veth_a_addr="${2}"
143	veth_b_addr="${3}"
144
145	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
146	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
147	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
148	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
149
150	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
151	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
152	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
153	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
154}
155
156setup_xfrm4() {
157	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
158}
159
160setup_xfrm6() {
161	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
162}
163
164setup() {
165	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return 1
166
167	cleanup_done=0
168	for arg do
169		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
170	done
171}
172
173cleanup() {
174	[ ${cleanup_done} -eq 1 ] && return
175	ip netns del ${NS_A} 2 > /dev/null
176	ip netns del ${NS_B} 2 > /dev/null
177	cleanup_done=1
178}
179
180mtu() {
181	ns_cmd="${1}"
182	dev="${2}"
183	mtu="${3}"
184
185	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
186}
187
188mtu_parse() {
189	input="${1}"
190
191	next=0
192	for i in ${input}; do
193		[ ${next} -eq 1 ] && echo "${i}" && return
194		[ "${i}" = "mtu" ] && next=1
195	done
196}
197
198link_get() {
199	ns_cmd="${1}"
200	name="${2}"
201
202	${ns_cmd} ip link show dev "${name}"
203}
204
205link_get_mtu() {
206	ns_cmd="${1}"
207	name="${2}"
208
209	mtu_parse "$(link_get "${ns_cmd}" ${name})"
210}
211
212route_get_dst_exception() {
213	ns_cmd="${1}"
214	dst="${2}"
215
216	${ns_cmd} ip route get "${dst}"
217}
218
219route_get_dst_pmtu_from_exception() {
220	ns_cmd="${1}"
221	dst="${2}"
222
223	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
224}
225
226test_pmtu_vti4_exception() {
227	setup namespaces veth vti4 xfrm4 || return 2
228
229	veth_mtu=1500
230	vti_mtu=$((veth_mtu - 20))
231
232	#                                SPI   SN   IV  ICV   pad length   next header
233	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
234	ping_payload=$((esp_payload_rfc4106 - 28))
235
236	mtu "${ns_a}" veth_a ${veth_mtu}
237	mtu "${ns_b}" veth_b ${veth_mtu}
238	mtu "${ns_a}" vti4_a ${vti_mtu}
239	mtu "${ns_b}" vti4_b ${vti_mtu}
240
241	# Send DF packet without exceeding link layer MTU, check that no
242	# exception is created
243	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
244	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
245	if [ "${pmtu}" != "" ]; then
246		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
247		return 1
248	fi
249
250	# Now exceed link layer MTU by one byte, check that exception is created
251	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
252	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
253	if [ "${pmtu}" = "" ]; then
254		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
255		return 1
256	fi
257
258	# ...with the right PMTU value
259	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
260		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
261		return 1
262	fi
263}
264
265test_pmtu_vti6_exception() {
266	setup namespaces veth vti6 xfrm6 || return 2
267	fail=0
268
269	# Create route exception by exceeding link layer MTU
270	mtu "${ns_a}" veth_a 4000
271	mtu "${ns_b}" veth_b 4000
272	mtu "${ns_a}" vti6_a 5000
273	mtu "${ns_b}" vti6_b 5000
274	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
275
276	# Check that exception was created
277	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
278		err "  tunnel exceeding link layer MTU didn't create route exception"
279		return 1
280	fi
281
282	# Decrease tunnel MTU, check for PMTU decrease in route exception
283	mtu "${ns_a}" vti6_a 3000
284
285	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
286		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
287		fail=1
288	fi
289
290	# Increase tunnel MTU, check for PMTU increase in route exception
291	mtu "${ns_a}" vti6_a 9000
292	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
293		err "  increasing tunnel MTU didn't increase route exception PMTU"
294		fail=1
295	fi
296
297	return ${fail}
298}
299
300test_pmtu_vti4_default_mtu() {
301	setup namespaces veth vti4 || return 2
302
303	# Check that MTU of vti device is MTU of veth minus IPv4 header length
304	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
305	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
306	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
307		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
308		return 1
309	fi
310}
311
312test_pmtu_vti6_default_mtu() {
313	setup namespaces veth vti6 || return 2
314
315	# Check that MTU of vti device is MTU of veth minus IPv6 header length
316	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
317	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
318	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
319		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
320		return 1
321	fi
322}
323
324test_pmtu_vti4_link_add_mtu() {
325	setup namespaces || return 2
326
327	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
328	[ $? -ne 0 ] && err "  vti not supported" && return 2
329	${ns_a} ip link del vti4_a
330
331	fail=0
332
333	min=68
334	max=$((65528 - 20))
335	# Check invalid values first
336	for v in $((min - 1)) $((max + 1)); do
337		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
338		# This can fail, or MTU can be adjusted to a proper value
339		[ $? -ne 0 ] && continue
340		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
341		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
342			err "  vti tunnel created with invalid MTU ${mtu}"
343			fail=1
344		fi
345		${ns_a} ip link del vti4_a
346	done
347
348	# Now check valid values
349	for v in ${min} 1300 ${max}; do
350		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
351		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
352		${ns_a} ip link del vti4_a
353		if [ "${mtu}" != "${v}" ]; then
354			err "  vti MTU ${mtu} doesn't match configured value ${v}"
355			fail=1
356		fi
357	done
358
359	return ${fail}
360}
361
362test_pmtu_vti6_link_add_mtu() {
363	setup namespaces || return 2
364
365	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
366	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
367	${ns_a} ip link del vti6_a
368
369	fail=0
370
371	min=1280
372	max=$((65535 - 40))
373	# Check invalid values first
374	for v in $((min - 1)) $((max + 1)); do
375		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
376		# This can fail, or MTU can be adjusted to a proper value
377		[ $? -ne 0 ] && continue
378		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
379		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
380			err "  vti6 tunnel created with invalid MTU ${v}"
381			fail=1
382		fi
383		${ns_a} ip link del vti6_a
384	done
385
386	# Now check valid values
387	for v in 1280 1300 $((65535 - 40)); do
388		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
389		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
390		${ns_a} ip link del vti6_a
391		if [ "${mtu}" != "${v}" ]; then
392			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
393			fail=1
394		fi
395	done
396
397	return ${fail}
398}
399
400test_pmtu_vti6_link_change_mtu() {
401	setup namespaces || return 2
402
403	${ns_a} ip link add dummy0 mtu 1500 type dummy
404	[ $? -ne 0 ] && err "  dummy not supported" && return 2
405	${ns_a} ip link add dummy1 mtu 3000 type dummy
406	${ns_a} ip link set dummy0 up
407	${ns_a} ip link set dummy1 up
408
409	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
410	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
411
412	fail=0
413
414	# Create vti6 interface bound to device, passing MTU, check it
415	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
416	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
417	if [ ${mtu} -ne 1300 ]; then
418		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
419		fail=1
420	fi
421
422	# Move to another device with different MTU, without passing MTU, check
423	# MTU is adjusted
424	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
425	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
426	if [ ${mtu} -ne $((3000 - 40)) ]; then
427		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
428		fail=1
429	fi
430
431	# Move it back, passing MTU, check MTU is not overridden
432	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
433	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
434	if [ ${mtu} -ne 1280 ]; then
435		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
436		fail=1
437	fi
438
439	return ${fail}
440}
441
442trap cleanup EXIT
443
444exitcode=0
445desc=0
446IFS="
447"
448for t in ${tests}; do
449	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
450
451	(
452		unset IFS
453		eval test_${name}
454		ret=$?
455		cleanup
456
457		if [ $ret -eq 0 ]; then
458			printf "TEST: %-60s  [ OK ]\n" "${t}"
459		elif [ $ret -eq 1 ]; then
460			printf "TEST: %-60s  [FAIL]\n" "${t}"
461			err_flush
462			exit 1
463		elif [ $ret -eq 2 ]; then
464			printf "TEST: %-60s  [SKIP]\n" "${t}"
465			err_flush
466		fi
467	)
468	[ $? -ne 0 ] && exitcode=1
469done
470
471exit ${exitcode}
472