1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Check that route PMTU values match expectations, and that initial device MTU
5# values are assigned correctly
6#
7# Tests currently implemented:
8#
9# - pmtu_vti4_exception
10#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
11#	namespaces with matching endpoints. Check that route exception is not
12#	created if link layer MTU is not exceeded, then exceed it and check that
13#	exception is created with the expected PMTU. The approach described
14#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
15#	changes alone won't affect PMTU
16#
17# - pmtu_vti6_exception
18#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
19#	namespaces with matching endpoints. Check that route exception is
20#	created by exceeding link layer MTU with ping to other endpoint. Then
21#	decrease and increase MTU of tunnel, checking that route exception PMTU
22#	changes accordingly
23#
24# - pmtu_vti4_default_mtu
25#	Set up vti4 tunnel on top of veth, in two namespaces with matching
26#	endpoints. Check that MTU assigned to vti interface is the MTU of the
27#	lower layer (veth) minus additional lower layer headers (zero, for veth)
28#	minus IPv4 header length
29#
30# - pmtu_vti6_default_mtu
31#	Same as above, for IPv6
32#
33# - pmtu_vti4_link_add_mtu
34#	Set up vti4 interface passing MTU value at link creation, check MTU is
35#	configured, and that link is not created with invalid MTU values
36#
37# - pmtu_vti6_link_add_mtu
38#	Same as above, for IPv6
39#
40# - pmtu_vti6_link_change_mtu
41#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
42#	and check that configured MTU is used on link creation and changes, and
43#	that MTU is properly calculated instead when MTU is not configured from
44#	userspace
45
46# Kselftest framework requirement - SKIP code is 4.
47ksft_skip=4
48
49tests="
50	pmtu_vti6_exception		vti6: PMTU exceptions
51	pmtu_vti4_exception		vti4: PMTU exceptions
52	pmtu_vti4_default_mtu		vti4: default MTU assignment
53	pmtu_vti6_default_mtu		vti6: default MTU assignment
54	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
55	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
56	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
57
58NS_A="ns-$(mktemp -u XXXXXX)"
59NS_B="ns-$(mktemp -u XXXXXX)"
60ns_a="ip netns exec ${NS_A}"
61ns_b="ip netns exec ${NS_B}"
62
63veth4_a_addr="192.168.1.1"
64veth4_b_addr="192.168.1.2"
65veth4_mask="24"
66veth6_a_addr="fd00:1::a"
67veth6_b_addr="fd00:1::b"
68veth6_mask="64"
69
70vti4_a_addr="192.168.2.1"
71vti4_b_addr="192.168.2.2"
72vti4_mask="24"
73vti6_a_addr="fd00:2::a"
74vti6_b_addr="fd00:2::b"
75vti6_mask="64"
76
77dummy6_0_addr="fc00:1000::0"
78dummy6_1_addr="fc00:1001::0"
79dummy6_mask="64"
80
81cleanup_done=1
82err_buf=
83
84err() {
85	err_buf="${err_buf}${1}
86"
87}
88
89err_flush() {
90	echo -n "${err_buf}"
91	err_buf=
92}
93
94setup_namespaces() {
95	ip netns add ${NS_A} || return 1
96	ip netns add ${NS_B}
97}
98
99setup_veth() {
100	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
101	${ns_a} ip link set veth_b netns ${NS_B}
102
103	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
104	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
105
106	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
107	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
108
109	${ns_a} ip link set veth_a up
110	${ns_b} ip link set veth_b up
111}
112
113setup_vti() {
114	proto=${1}
115	veth_a_addr="${2}"
116	veth_b_addr="${3}"
117	vti_a_addr="${4}"
118	vti_b_addr="${5}"
119	vti_mask=${6}
120
121	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
122
123	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
124	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
125
126	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
127	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
128
129	${ns_a} ip link set vti${proto}_a up
130	${ns_b} ip link set vti${proto}_b up
131
132	sleep 1
133}
134
135setup_vti4() {
136	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
137}
138
139setup_vti6() {
140	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
141}
142
143setup_xfrm() {
144	proto=${1}
145	veth_a_addr="${2}"
146	veth_b_addr="${3}"
147
148	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
149	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
150	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
151	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
152
153	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
154	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
155	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
156	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
157}
158
159setup_xfrm4() {
160	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
161}
162
163setup_xfrm6() {
164	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
165}
166
167setup() {
168	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
169
170	cleanup_done=0
171	for arg do
172		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
173	done
174}
175
176cleanup() {
177	[ ${cleanup_done} -eq 1 ] && return
178	ip netns del ${NS_A} 2 > /dev/null
179	ip netns del ${NS_B} 2 > /dev/null
180	cleanup_done=1
181}
182
183mtu() {
184	ns_cmd="${1}"
185	dev="${2}"
186	mtu="${3}"
187
188	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
189}
190
191mtu_parse() {
192	input="${1}"
193
194	next=0
195	for i in ${input}; do
196		[ ${next} -eq 1 ] && echo "${i}" && return
197		[ "${i}" = "mtu" ] && next=1
198	done
199}
200
201link_get() {
202	ns_cmd="${1}"
203	name="${2}"
204
205	${ns_cmd} ip link show dev "${name}"
206}
207
208link_get_mtu() {
209	ns_cmd="${1}"
210	name="${2}"
211
212	mtu_parse "$(link_get "${ns_cmd}" ${name})"
213}
214
215route_get_dst_exception() {
216	ns_cmd="${1}"
217	dst="${2}"
218
219	${ns_cmd} ip route get "${dst}"
220}
221
222route_get_dst_pmtu_from_exception() {
223	ns_cmd="${1}"
224	dst="${2}"
225
226	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
227}
228
229test_pmtu_vti4_exception() {
230	setup namespaces veth vti4 xfrm4 || return 2
231
232	veth_mtu=1500
233	vti_mtu=$((veth_mtu - 20))
234
235	#                                SPI   SN   IV  ICV   pad length   next header
236	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
237	ping_payload=$((esp_payload_rfc4106 - 28))
238
239	mtu "${ns_a}" veth_a ${veth_mtu}
240	mtu "${ns_b}" veth_b ${veth_mtu}
241	mtu "${ns_a}" vti4_a ${vti_mtu}
242	mtu "${ns_b}" vti4_b ${vti_mtu}
243
244	# Send DF packet without exceeding link layer MTU, check that no
245	# exception is created
246	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
247	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
248	if [ "${pmtu}" != "" ]; then
249		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
250		return 1
251	fi
252
253	# Now exceed link layer MTU by one byte, check that exception is created
254	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
255	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
256	if [ "${pmtu}" = "" ]; then
257		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
258		return 1
259	fi
260
261	# ...with the right PMTU value
262	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
263		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
264		return 1
265	fi
266}
267
268test_pmtu_vti6_exception() {
269	setup namespaces veth vti6 xfrm6 || return 2
270	fail=0
271
272	# Create route exception by exceeding link layer MTU
273	mtu "${ns_a}" veth_a 4000
274	mtu "${ns_b}" veth_b 4000
275	mtu "${ns_a}" vti6_a 5000
276	mtu "${ns_b}" vti6_b 5000
277	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
278
279	# Check that exception was created
280	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
281		err "  tunnel exceeding link layer MTU didn't create route exception"
282		return 1
283	fi
284
285	# Decrease tunnel MTU, check for PMTU decrease in route exception
286	mtu "${ns_a}" vti6_a 3000
287
288	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
289		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
290		fail=1
291	fi
292
293	# Increase tunnel MTU, check for PMTU increase in route exception
294	mtu "${ns_a}" vti6_a 9000
295	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
296		err "  increasing tunnel MTU didn't increase route exception PMTU"
297		fail=1
298	fi
299
300	return ${fail}
301}
302
303test_pmtu_vti4_default_mtu() {
304	setup namespaces veth vti4 || return 2
305
306	# Check that MTU of vti device is MTU of veth minus IPv4 header length
307	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
308	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
309	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
310		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
311		return 1
312	fi
313}
314
315test_pmtu_vti6_default_mtu() {
316	setup namespaces veth vti6 || return 2
317
318	# Check that MTU of vti device is MTU of veth minus IPv6 header length
319	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
320	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
321	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
322		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
323		return 1
324	fi
325}
326
327test_pmtu_vti4_link_add_mtu() {
328	setup namespaces || return 2
329
330	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
331	[ $? -ne 0 ] && err "  vti not supported" && return 2
332	${ns_a} ip link del vti4_a
333
334	fail=0
335
336	min=68
337	max=$((65528 - 20))
338	# Check invalid values first
339	for v in $((min - 1)) $((max + 1)); do
340		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
341		# This can fail, or MTU can be adjusted to a proper value
342		[ $? -ne 0 ] && continue
343		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
344		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
345			err "  vti tunnel created with invalid MTU ${mtu}"
346			fail=1
347		fi
348		${ns_a} ip link del vti4_a
349	done
350
351	# Now check valid values
352	for v in ${min} 1300 ${max}; do
353		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
354		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
355		${ns_a} ip link del vti4_a
356		if [ "${mtu}" != "${v}" ]; then
357			err "  vti MTU ${mtu} doesn't match configured value ${v}"
358			fail=1
359		fi
360	done
361
362	return ${fail}
363}
364
365test_pmtu_vti6_link_add_mtu() {
366	setup namespaces || return 2
367
368	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
369	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
370	${ns_a} ip link del vti6_a
371
372	fail=0
373
374	min=68			# vti6 can carry IPv4 packets too
375	max=$((65535 - 40))
376	# Check invalid values first
377	for v in $((min - 1)) $((max + 1)); do
378		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
379		# This can fail, or MTU can be adjusted to a proper value
380		[ $? -ne 0 ] && continue
381		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
382		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
383			err "  vti6 tunnel created with invalid MTU ${v}"
384			fail=1
385		fi
386		${ns_a} ip link del vti6_a
387	done
388
389	# Now check valid values
390	for v in 68 1280 1300 $((65535 - 40)); do
391		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
392		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
393		${ns_a} ip link del vti6_a
394		if [ "${mtu}" != "${v}" ]; then
395			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
396			fail=1
397		fi
398	done
399
400	return ${fail}
401}
402
403test_pmtu_vti6_link_change_mtu() {
404	setup namespaces || return 2
405
406	${ns_a} ip link add dummy0 mtu 1500 type dummy
407	[ $? -ne 0 ] && err "  dummy not supported" && return 2
408	${ns_a} ip link add dummy1 mtu 3000 type dummy
409	${ns_a} ip link set dummy0 up
410	${ns_a} ip link set dummy1 up
411
412	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
413	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
414
415	fail=0
416
417	# Create vti6 interface bound to device, passing MTU, check it
418	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
419	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
420	if [ ${mtu} -ne 1300 ]; then
421		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
422		fail=1
423	fi
424
425	# Move to another device with different MTU, without passing MTU, check
426	# MTU is adjusted
427	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
428	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
429	if [ ${mtu} -ne $((3000 - 40)) ]; then
430		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
431		fail=1
432	fi
433
434	# Move it back, passing MTU, check MTU is not overridden
435	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
436	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
437	if [ ${mtu} -ne 1280 ]; then
438		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
439		fail=1
440	fi
441
442	return ${fail}
443}
444
445trap cleanup EXIT
446
447exitcode=0
448desc=0
449IFS="
450"
451for t in ${tests}; do
452	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
453
454	(
455		unset IFS
456		eval test_${name}
457		ret=$?
458		cleanup
459
460		if [ $ret -eq 0 ]; then
461			printf "TEST: %-60s  [ OK ]\n" "${t}"
462		elif [ $ret -eq 1 ]; then
463			printf "TEST: %-60s  [FAIL]\n" "${t}"
464			err_flush
465			exit 1
466		elif [ $ret -eq 2 ]; then
467			printf "TEST: %-60s  [SKIP]\n" "${t}"
468			err_flush
469		fi
470	)
471	[ $? -ne 0 ] && exitcode=1
472done
473
474exit ${exitcode}
475