xref: /openbmc/linux/tools/testing/selftests/net/pmtu.sh (revision 4f727ecefefbd180de10e25b3e74c03dce3f1e75)
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Check that route PMTU values match expectations, and that initial device MTU
5# values are assigned correctly
6#
7# Tests currently implemented:
8#
9# - pmtu_ipv4
10#	Set up two namespaces, A and B, with two paths between them over routers
11#	R1 and R2 (also implemented with namespaces), with different MTUs:
12#
13#	  segment a_r1    segment b_r1		a_r1: 2000
14#	.--------------R1--------------.	a_r2: 1500
15#	A                               B	a_r3: 2000
16#	'--------------R2--------------'	a_r4: 1400
17#	  segment a_r2    segment b_r2
18#
19#	Check that PMTU exceptions with the correct PMTU are created. Then
20#	decrease and increase the MTU of the local link for one of the paths,
21#	A to R1, checking that route exception PMTU changes accordingly over
22#	this path. Also check that locked exceptions are created when an ICMP
23#	message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
24#	received
25#
26# - pmtu_ipv6
27#	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
28#
29# - pmtu_ipv4_vxlan4_exception
30#	Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
31#	over IPv4 between A and B, routed via R1. On the link between R1 and B,
32#	set a MTU lower than the VXLAN MTU and the MTU on the link between A and
33#	R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN
34#	from A to B and check that the PMTU exception is created with the right
35#	value on A
36#
37# - pmtu_ipv6_vxlan4_exception
38#	Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B
39#
40# - pmtu_ipv4_vxlan6_exception
41#	Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B
42#
43# - pmtu_ipv6_vxlan6_exception
44#	Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B
45#
46# - pmtu_ipv4_geneve4_exception
47#	Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of
48#	VXLAN
49#
50# - pmtu_ipv6_geneve4_exception
51#	Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of
52#	VXLAN
53#
54# - pmtu_ipv4_geneve6_exception
55#	Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of
56#	VXLAN
57#
58# - pmtu_ipv6_geneve6_exception
59#	Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
60#	VXLAN
61#
62# - pmtu_ipv{4,6}_fou{4,6}_exception
63#	Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
64#	(FoU) over IPv4/IPv6, instead of VXLAN
65#
66# - pmtu_ipv{4,6}_fou{4,6}_exception
67#	Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
68#	encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
69#
70# - pmtu_vti4_exception
71#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
72#	namespaces with matching endpoints. Check that route exception is not
73#	created if link layer MTU is not exceeded, then exceed it and check that
74#	exception is created with the expected PMTU. The approach described
75#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
76#	changes alone won't affect PMTU
77#
78# - pmtu_vti6_exception
79#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
80#	namespaces with matching endpoints. Check that route exception is
81#	created by exceeding link layer MTU with ping to other endpoint. Then
82#	decrease and increase MTU of tunnel, checking that route exception PMTU
83#	changes accordingly
84#
85# - pmtu_vti4_default_mtu
86#	Set up vti4 tunnel on top of veth, in two namespaces with matching
87#	endpoints. Check that MTU assigned to vti interface is the MTU of the
88#	lower layer (veth) minus additional lower layer headers (zero, for veth)
89#	minus IPv4 header length
90#
91# - pmtu_vti6_default_mtu
92#	Same as above, for IPv6
93#
94# - pmtu_vti4_link_add_mtu
95#	Set up vti4 interface passing MTU value at link creation, check MTU is
96#	configured, and that link is not created with invalid MTU values
97#
98# - pmtu_vti6_link_add_mtu
99#	Same as above, for IPv6
100#
101# - pmtu_vti6_link_change_mtu
102#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
103#	and check that configured MTU is used on link creation and changes, and
104#	that MTU is properly calculated instead when MTU is not configured from
105#	userspace
106#
107# - cleanup_ipv4_exception
108#	Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU
109#	exceptions on multiple CPUs and check that the veth device tear-down
110# 	happens in a timely manner
111#
112# - cleanup_ipv6_exception
113#	Same as above, but use IPv6 transport from A to B
114
115
116# Kselftest framework requirement - SKIP code is 4.
117ksft_skip=4
118
119PAUSE_ON_FAIL=no
120VERBOSE=0
121TRACING=0
122
123# Some systems don't have a ping6 binary anymore
124which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
125
126tests="
127	pmtu_ipv4_exception		ipv4: PMTU exceptions
128	pmtu_ipv6_exception		ipv6: PMTU exceptions
129	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions
130	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions
131	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions
132	pmtu_ipv6_vxlan6_exception	IPv6 over vxlan6: PMTU exceptions
133	pmtu_ipv4_geneve4_exception	IPv4 over geneve4: PMTU exceptions
134	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions
135	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions
136	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions
137	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions
138	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions
139	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions
140	pmtu_ipv6_fou6_exception	IPv6 over fou6: PMTU exceptions
141	pmtu_ipv4_gue4_exception	IPv4 over gue4: PMTU exceptions
142	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions
143	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions
144	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions
145	pmtu_vti6_exception		vti6: PMTU exceptions
146	pmtu_vti4_exception		vti4: PMTU exceptions
147	pmtu_vti4_default_mtu		vti4: default MTU assignment
148	pmtu_vti6_default_mtu		vti6: default MTU assignment
149	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
150	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
151	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes
152	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions
153	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions"
154
155NS_A="ns-$(mktemp -u XXXXXX)"
156NS_B="ns-$(mktemp -u XXXXXX)"
157NS_R1="ns-$(mktemp -u XXXXXX)"
158NS_R2="ns-$(mktemp -u XXXXXX)"
159ns_a="ip netns exec ${NS_A}"
160ns_b="ip netns exec ${NS_B}"
161ns_r1="ip netns exec ${NS_R1}"
162ns_r2="ip netns exec ${NS_R2}"
163
164# Addressing and routing for tests with routers: four network segments, with
165# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
166# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
167# Addresses are:
168# - IPv4: PREFIX4.SEGMENT.ID (/24)
169# - IPv6: PREFIX6:SEGMENT::ID (/64)
170prefix4="10.0"
171prefix6="fc00"
172a_r1=1
173a_r2=2
174b_r1=3
175b_r2=4
176#	ns	peer	segment
177routing_addrs="
178	A	R1	${a_r1}
179	A	R2	${a_r2}
180	B	R1	${b_r1}
181	B	R2	${b_r2}
182"
183# Traffic from A to B goes through R1 by default, and through R2, if destined to
184# B's address on the b_r2 segment.
185# Traffic from B to A goes through R1.
186#	ns	destination		gateway
187routes="
188	A	default			${prefix4}.${a_r1}.2
189	A	${prefix4}.${b_r2}.1	${prefix4}.${a_r2}.2
190	B	default			${prefix4}.${b_r1}.2
191
192	A	default			${prefix6}:${a_r1}::2
193	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
194	B	default			${prefix6}:${b_r1}::2
195"
196
197veth4_a_addr="192.168.1.1"
198veth4_b_addr="192.168.1.2"
199veth4_mask="24"
200veth6_a_addr="fd00:1::a"
201veth6_b_addr="fd00:1::b"
202veth6_mask="64"
203
204tunnel4_a_addr="192.168.2.1"
205tunnel4_b_addr="192.168.2.2"
206tunnel4_mask="24"
207tunnel6_a_addr="fd00:2::a"
208tunnel6_b_addr="fd00:2::b"
209tunnel6_mask="64"
210
211dummy6_0_prefix="fc00:1000::"
212dummy6_1_prefix="fc00:1001::"
213dummy6_mask="64"
214
215cleanup_done=1
216err_buf=
217tcpdump_pids=
218
219err() {
220	err_buf="${err_buf}${1}
221"
222}
223
224err_flush() {
225	echo -n "${err_buf}"
226	err_buf=
227}
228
229run_cmd() {
230	cmd="$*"
231
232	if [ "$VERBOSE" = "1" ]; then
233		printf "    COMMAND: $cmd\n"
234	fi
235
236	out="$($cmd 2>&1)"
237	rc=$?
238	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
239		echo "    $out"
240		echo
241	fi
242
243	return $rc
244}
245
246# Find the auto-generated name for this namespace
247nsname() {
248	eval echo \$NS_$1
249}
250
251setup_fou_or_gue() {
252	outer="${1}"
253	inner="${2}"
254	encap="${3}"
255
256	if [ "${outer}" = "4" ]; then
257		modprobe fou || return 2
258		a_addr="${prefix4}.${a_r1}.1"
259		b_addr="${prefix4}.${b_r1}.1"
260		if [ "${inner}" = "4" ]; then
261			type="ipip"
262			ipproto="4"
263		else
264			type="sit"
265			ipproto="41"
266		fi
267	else
268		modprobe fou6 || return 2
269		a_addr="${prefix6}:${a_r1}::1"
270		b_addr="${prefix6}:${b_r1}::1"
271		if [ "${inner}" = "4" ]; then
272			type="ip6tnl"
273			mode="mode ipip6"
274			ipproto="4 -6"
275		else
276			type="ip6tnl"
277			mode="mode ip6ip6"
278			ipproto="41 -6"
279		fi
280	fi
281
282	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
283	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
284
285	run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
286	run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
287
288	if [ "${inner}" = "4" ]; then
289		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
290		run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
291	else
292		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
293		run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
294	fi
295
296	run_cmd ${ns_a} ip link set ${encap}_a up
297	run_cmd ${ns_b} ip link set ${encap}_b up
298}
299
300setup_fou44() {
301	setup_fou_or_gue 4 4 fou
302}
303
304setup_fou46() {
305	setup_fou_or_gue 4 6 fou
306}
307
308setup_fou64() {
309	setup_fou_or_gue 6 4 fou
310}
311
312setup_fou66() {
313	setup_fou_or_gue 6 6 fou
314}
315
316setup_gue44() {
317	setup_fou_or_gue 4 4 gue
318}
319
320setup_gue46() {
321	setup_fou_or_gue 4 6 gue
322}
323
324setup_gue64() {
325	setup_fou_or_gue 6 4 gue
326}
327
328setup_gue66() {
329	setup_fou_or_gue 6 6 gue
330}
331
332setup_namespaces() {
333	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
334		ip netns add ${n} || return 1
335
336		# Disable DAD, so that we don't have to wait to use the
337		# configured IPv6 addresses
338		ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
339	done
340}
341
342setup_veth() {
343	run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
344	run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
345
346	run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
347	run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
348
349	run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
350	run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
351
352	run_cmd ${ns_a} ip link set veth_a up
353	run_cmd ${ns_b} ip link set veth_b up
354}
355
356setup_vti() {
357	proto=${1}
358	veth_a_addr="${2}"
359	veth_b_addr="${3}"
360	vti_a_addr="${4}"
361	vti_b_addr="${5}"
362	vti_mask=${6}
363
364	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
365
366	run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
367	run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
368
369	run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
370	run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
371
372	run_cmd ${ns_a} ip link set vti${proto}_a up
373	run_cmd ${ns_b} ip link set vti${proto}_b up
374}
375
376setup_vti4() {
377	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
378}
379
380setup_vti6() {
381	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
382}
383
384setup_vxlan_or_geneve() {
385	type="${1}"
386	a_addr="${2}"
387	b_addr="${3}"
388	opts="${4}"
389
390	if [ "${type}" = "vxlan" ]; then
391		opts="${opts} ttl 64 dstport 4789"
392		opts_a="local ${a_addr}"
393		opts_b="local ${b_addr}"
394	else
395		opts_a=""
396		opts_b=""
397	fi
398
399	run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
400	run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
401
402	run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
403	run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
404
405	run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
406	run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
407
408	run_cmd ${ns_a} ip link set ${type}_a up
409	run_cmd ${ns_b} ip link set ${type}_b up
410}
411
412setup_geneve4() {
413	setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
414}
415
416setup_vxlan4() {
417	setup_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
418}
419
420setup_geneve6() {
421	setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
422}
423
424setup_vxlan6() {
425	setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
426}
427
428setup_xfrm() {
429	proto=${1}
430	veth_a_addr="${2}"
431	veth_b_addr="${3}"
432
433	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
434	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
435	run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
436	run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
437
438	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
439	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
440	run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
441	run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
442}
443
444setup_xfrm4() {
445	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
446}
447
448setup_xfrm6() {
449	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
450}
451
452setup_routing() {
453	for i in ${NS_R1} ${NS_R2}; do
454		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
455		ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
456	done
457
458	for i in ${routing_addrs}; do
459		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
460		[ "${peer}" = "" ]	&& peer="${i}"		&& continue
461		[ "${segment}" = "" ]	&& segment="${i}"
462
463		ns_name="$(nsname ${ns})"
464		peer_name="$(nsname ${peer})"
465		if="veth_${ns}-${peer}"
466		ifpeer="veth_${peer}-${ns}"
467
468		# Create veth links
469		ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
470		ip -n ${peer_name} link set dev ${ifpeer} up
471
472		# Add addresses
473		ip -n ${ns_name}   addr add ${prefix4}.${segment}.1/24  dev ${if}
474		ip -n ${ns_name}   addr add ${prefix6}:${segment}::1/64 dev ${if}
475
476		ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24  dev ${ifpeer}
477		ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
478
479		ns=""; peer=""; segment=""
480	done
481
482	for i in ${routes}; do
483		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
484		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
485		[ "${gw}" = "" ]	&& gw="${i}"
486
487		ns_name="$(nsname ${ns})"
488
489		ip -n ${ns_name} route add ${addr} via ${gw}
490
491		ns=""; addr=""; gw=""
492	done
493}
494
495setup() {
496	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
497
498	cleanup_done=0
499	for arg do
500		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
501	done
502}
503
504trace() {
505	[ $TRACING -eq 0 ] && return
506
507	for arg do
508		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
509		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
510		tcpdump_pids="${tcpdump_pids} $!"
511		ns_cmd=
512	done
513	sleep 1
514}
515
516cleanup() {
517	for pid in ${tcpdump_pids}; do
518		kill ${pid}
519	done
520	tcpdump_pids=
521
522	[ ${cleanup_done} -eq 1 ] && return
523	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
524		ip netns del ${n} 2> /dev/null
525	done
526	cleanup_done=1
527}
528
529mtu() {
530	ns_cmd="${1}"
531	dev="${2}"
532	mtu="${3}"
533
534	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
535}
536
537mtu_parse() {
538	input="${1}"
539
540	next=0
541	for i in ${input}; do
542		[ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
543		[ ${next} -eq 1 ] && echo "${i}" && return
544		[ ${next} -eq 2 ] && echo "lock ${i}" && return
545		[ "${i}" = "mtu" ] && next=1
546	done
547}
548
549link_get() {
550	ns_cmd="${1}"
551	name="${2}"
552
553	${ns_cmd} ip link show dev "${name}"
554}
555
556link_get_mtu() {
557	ns_cmd="${1}"
558	name="${2}"
559
560	mtu_parse "$(link_get "${ns_cmd}" ${name})"
561}
562
563route_get_dst_exception() {
564	ns_cmd="${1}"
565	dst="${2}"
566
567	${ns_cmd} ip route get "${dst}"
568}
569
570route_get_dst_pmtu_from_exception() {
571	ns_cmd="${1}"
572	dst="${2}"
573
574	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
575}
576
577check_pmtu_value() {
578	expected="${1}"
579	value="${2}"
580	event="${3}"
581
582	[ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
583	[ "${value}" = "${expected}" ] && return 0
584	[ -z "${value}" ] &&    err "  PMTU exception wasn't created after ${event}" && return 1
585	[ -z "${expected}" ] && err "  PMTU exception shouldn't exist after ${event}" && return 1
586	err "  found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
587	return 1
588}
589
590test_pmtu_ipvX() {
591	family=${1}
592
593	setup namespaces routing || return 2
594	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
595	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
596	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
597	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
598
599	if [ ${family} -eq 4 ]; then
600		ping=ping
601		dst1="${prefix4}.${b_r1}.1"
602		dst2="${prefix4}.${b_r2}.1"
603	else
604		ping=${ping6}
605		dst1="${prefix6}:${b_r1}::1"
606		dst2="${prefix6}:${b_r2}::1"
607	fi
608
609	# Set up initial MTU values
610	mtu "${ns_a}"  veth_A-R1 2000
611	mtu "${ns_r1}" veth_R1-A 2000
612	mtu "${ns_r1}" veth_R1-B 1400
613	mtu "${ns_b}"  veth_B-R1 1400
614
615	mtu "${ns_a}"  veth_A-R2 2000
616	mtu "${ns_r2}" veth_R2-A 2000
617	mtu "${ns_r2}" veth_R2-B 1500
618	mtu "${ns_b}"  veth_B-R2 1500
619
620	# Create route exceptions
621	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
622	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
623
624	# Check that exceptions have been created with the correct PMTU
625	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
626	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
627	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
628	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
629
630	# Decrease local MTU below PMTU, check for PMTU decrease in route exception
631	mtu "${ns_a}"  veth_A-R1 1300
632	mtu "${ns_r1}" veth_R1-A 1300
633	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
634	check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
635	# Second exception shouldn't be modified
636	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
637	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
638
639	# Increase MTU, check for PMTU increase in route exception
640	mtu "${ns_a}"  veth_A-R1 1700
641	mtu "${ns_r1}" veth_R1-A 1700
642	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
643	check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
644	# Second exception shouldn't be modified
645	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
646	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
647
648	# Skip PMTU locking tests for IPv6
649	[ $family -eq 6 ] && return 0
650
651	# Decrease remote MTU on path via R2, get new exception
652	mtu "${ns_r2}" veth_R2-B 400
653	mtu "${ns_b}"  veth_B-R2 400
654	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
655	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
656	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
657
658	# Decrease local MTU below PMTU
659	mtu "${ns_a}"  veth_A-R2 500
660	mtu "${ns_r2}" veth_R2-A 500
661	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
662	check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
663
664	# Increase local MTU
665	mtu "${ns_a}"  veth_A-R2 1500
666	mtu "${ns_r2}" veth_R2-A 1500
667	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
668	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
669
670	# Get new exception
671	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
672	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
673	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
674}
675
676test_pmtu_ipv4_exception() {
677	test_pmtu_ipvX 4
678}
679
680test_pmtu_ipv6_exception() {
681	test_pmtu_ipvX 6
682}
683
684test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
685	type=${1}
686	family=${2}
687	outer_family=${3}
688	ll_mtu=4000
689
690	if [ ${outer_family} -eq 4 ]; then
691		setup namespaces routing ${type}4 || return 2
692		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
693		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
694	else
695		setup namespaces routing ${type}6 || return 2
696		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
697		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
698	fi
699
700	trace "${ns_a}" ${type}_a    "${ns_b}"  ${type}_b \
701	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
702	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
703
704	if [ ${family} -eq 4 ]; then
705		ping=ping
706		dst=${tunnel4_b_addr}
707	else
708		ping=${ping6}
709		dst=${tunnel6_b_addr}
710	fi
711
712	# Create route exception by exceeding link layer MTU
713	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
714	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
715	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
716	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
717
718	mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
719	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
720	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
721
722	# Check that exception was created
723	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
724	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface"
725}
726
727test_pmtu_ipv4_vxlan4_exception() {
728	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 4
729}
730
731test_pmtu_ipv6_vxlan4_exception() {
732	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 4
733}
734
735test_pmtu_ipv4_geneve4_exception() {
736	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4
737}
738
739test_pmtu_ipv6_geneve4_exception() {
740	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4
741}
742
743test_pmtu_ipv4_vxlan6_exception() {
744	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 6
745}
746
747test_pmtu_ipv6_vxlan6_exception() {
748	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 6
749}
750
751test_pmtu_ipv4_geneve6_exception() {
752	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6
753}
754
755test_pmtu_ipv6_geneve6_exception() {
756	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
757}
758
759test_pmtu_ipvX_over_fouY_or_gueY() {
760	inner_family=${1}
761	outer_family=${2}
762	encap=${3}
763	ll_mtu=4000
764
765	setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
766	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
767	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
768	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
769
770	if [ ${inner_family} -eq 4 ]; then
771		ping=ping
772		dst=${tunnel4_b_addr}
773	else
774		ping=${ping6}
775		dst=${tunnel6_b_addr}
776	fi
777
778	if [ "${encap}" = "gue" ]; then
779		encap_overhead=4
780	else
781		encap_overhead=0
782	fi
783
784	if [ ${outer_family} -eq 4 ]; then
785		#                      IPv4 header   UDP header
786		exp_mtu=$((${ll_mtu} - 20          - 8         - ${encap_overhead}))
787	else
788		#                      IPv6 header   Option 4   UDP header
789		exp_mtu=$((${ll_mtu} - 40          - 8        - 8       - ${encap_overhead}))
790	fi
791
792	# Create route exception by exceeding link layer MTU
793	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
794	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
795	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
796	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
797
798	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
799	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
800	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
801
802	# Check that exception was created
803	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
804	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface"
805}
806
807test_pmtu_ipv4_fou4_exception() {
808	test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou
809}
810
811test_pmtu_ipv6_fou4_exception() {
812	test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou
813}
814
815test_pmtu_ipv4_fou6_exception() {
816	test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou
817}
818
819test_pmtu_ipv6_fou6_exception() {
820	test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou
821}
822
823test_pmtu_ipv4_gue4_exception() {
824	test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue
825}
826
827test_pmtu_ipv6_gue4_exception() {
828	test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue
829}
830
831test_pmtu_ipv4_gue6_exception() {
832	test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue
833}
834
835test_pmtu_ipv6_gue6_exception() {
836	test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
837}
838
839test_pmtu_vti4_exception() {
840	setup namespaces veth vti4 xfrm4 || return 2
841	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
842	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
843
844	veth_mtu=1500
845	vti_mtu=$((veth_mtu - 20))
846
847	#                                SPI   SN   IV  ICV   pad length   next header
848	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
849	ping_payload=$((esp_payload_rfc4106 - 28))
850
851	mtu "${ns_a}" veth_a ${veth_mtu}
852	mtu "${ns_b}" veth_b ${veth_mtu}
853	mtu "${ns_a}" vti4_a ${vti_mtu}
854	mtu "${ns_b}" vti4_b ${vti_mtu}
855
856	# Send DF packet without exceeding link layer MTU, check that no
857	# exception is created
858	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
859	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
860	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
861
862	# Now exceed link layer MTU by one byte, check that exception is created
863	# with the right PMTU value
864	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
865	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
866	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
867}
868
869test_pmtu_vti6_exception() {
870	setup namespaces veth vti6 xfrm6 || return 2
871	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
872	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
873	fail=0
874
875	# Create route exception by exceeding link layer MTU
876	mtu "${ns_a}" veth_a 4000
877	mtu "${ns_b}" veth_b 4000
878	mtu "${ns_a}" vti6_a 5000
879	mtu "${ns_b}" vti6_b 5000
880	run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
881
882	# Check that exception was created
883	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
884	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
885
886	# Decrease tunnel MTU, check for PMTU decrease in route exception
887	mtu "${ns_a}" vti6_a 3000
888	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
889	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
890
891	# Increase tunnel MTU, check for PMTU increase in route exception
892	mtu "${ns_a}" vti6_a 9000
893	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
894	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
895
896	return ${fail}
897}
898
899test_pmtu_vti4_default_mtu() {
900	setup namespaces veth vti4 || return 2
901
902	# Check that MTU of vti device is MTU of veth minus IPv4 header length
903	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
904	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
905	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
906		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
907		return 1
908	fi
909}
910
911test_pmtu_vti6_default_mtu() {
912	setup namespaces veth vti6 || return 2
913
914	# Check that MTU of vti device is MTU of veth minus IPv6 header length
915	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
916	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
917	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
918		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
919		return 1
920	fi
921}
922
923test_pmtu_vti4_link_add_mtu() {
924	setup namespaces || return 2
925
926	run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
927	[ $? -ne 0 ] && err "  vti not supported" && return 2
928	run_cmd ${ns_a} ip link del vti4_a
929
930	fail=0
931
932	min=68
933	max=$((65535 - 20))
934	# Check invalid values first
935	for v in $((min - 1)) $((max + 1)); do
936		run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
937		# This can fail, or MTU can be adjusted to a proper value
938		[ $? -ne 0 ] && continue
939		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
940		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
941			err "  vti tunnel created with invalid MTU ${mtu}"
942			fail=1
943		fi
944		run_cmd ${ns_a} ip link del vti4_a
945	done
946
947	# Now check valid values
948	for v in ${min} 1300 ${max}; do
949		run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
950		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
951		run_cmd ${ns_a} ip link del vti4_a
952		if [ "${mtu}" != "${v}" ]; then
953			err "  vti MTU ${mtu} doesn't match configured value ${v}"
954			fail=1
955		fi
956	done
957
958	return ${fail}
959}
960
961test_pmtu_vti6_link_add_mtu() {
962	setup namespaces || return 2
963
964	run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
965	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
966	run_cmd ${ns_a} ip link del vti6_a
967
968	fail=0
969
970	min=68			# vti6 can carry IPv4 packets too
971	max=$((65535 - 40))
972	# Check invalid values first
973	for v in $((min - 1)) $((max + 1)); do
974		run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
975		# This can fail, or MTU can be adjusted to a proper value
976		[ $? -ne 0 ] && continue
977		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
978		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
979			err "  vti6 tunnel created with invalid MTU ${v}"
980			fail=1
981		fi
982		run_cmd ${ns_a} ip link del vti6_a
983	done
984
985	# Now check valid values
986	for v in 68 1280 1300 $((65535 - 40)); do
987		run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
988		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
989		run_cmd ${ns_a} ip link del vti6_a
990		if [ "${mtu}" != "${v}" ]; then
991			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
992			fail=1
993		fi
994	done
995
996	return ${fail}
997}
998
999test_pmtu_vti6_link_change_mtu() {
1000	setup namespaces || return 2
1001
1002	run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
1003	[ $? -ne 0 ] && err "  dummy not supported" && return 2
1004	run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
1005	run_cmd ${ns_a} ip link set dummy0 up
1006	run_cmd ${ns_a} ip link set dummy1 up
1007
1008	run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0
1009	run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1
1010
1011	fail=0
1012
1013	# Create vti6 interface bound to device, passing MTU, check it
1014	run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
1015	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1016	if [ ${mtu} -ne 1300 ]; then
1017		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
1018		fail=1
1019	fi
1020
1021	# Move to another device with different MTU, without passing MTU, check
1022	# MTU is adjusted
1023	run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1
1024	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1025	if [ ${mtu} -ne $((3000 - 40)) ]; then
1026		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
1027		fail=1
1028	fi
1029
1030	# Move it back, passing MTU, check MTU is not overridden
1031	run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
1032	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1033	if [ ${mtu} -ne 1280 ]; then
1034		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
1035		fail=1
1036	fi
1037
1038	return ${fail}
1039}
1040
1041check_command() {
1042	cmd=${1}
1043
1044	if ! which ${cmd} > /dev/null 2>&1; then
1045		err "  missing required command: '${cmd}'"
1046		return 1
1047	fi
1048	return 0
1049}
1050
1051test_cleanup_vxlanX_exception() {
1052	outer="${1}"
1053	encap="vxlan"
1054	ll_mtu=4000
1055
1056	check_command taskset || return 2
1057	cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
1058
1059	setup namespaces routing ${encap}${outer} || return 2
1060	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
1061	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
1062	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
1063
1064	# Create route exception by exceeding link layer MTU
1065	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
1066	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1067	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
1068	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1069
1070	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
1071	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
1072
1073	# Fill exception cache for multiple CPUs (2)
1074	# we can always use inner IPv4 for that
1075	for cpu in ${cpu_list}; do
1076		run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
1077	done
1078
1079	${ns_a} ip link del dev veth_A-R1 &
1080	iplink_pid=$!
1081	sleep 1
1082	if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
1083		err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
1084		return 1
1085	fi
1086}
1087
1088test_cleanup_ipv6_exception() {
1089	test_cleanup_vxlanX_exception 6
1090}
1091
1092test_cleanup_ipv4_exception() {
1093	test_cleanup_vxlanX_exception 4
1094}
1095
1096usage() {
1097	echo
1098	echo "$0 [OPTIONS] [TEST]..."
1099	echo "If no TEST argument is given, all tests will be run."
1100	echo
1101	echo "Options"
1102	echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
1103	echo
1104	echo "Available tests${tests}"
1105	exit 1
1106}
1107
1108################################################################################
1109#
1110exitcode=0
1111desc=0
1112
1113while getopts :ptv o
1114do
1115	case $o in
1116	p) PAUSE_ON_FAIL=yes;;
1117	v) VERBOSE=1;;
1118	t) if which tcpdump > /dev/null 2>&1; then
1119		TRACING=1
1120	   else
1121		echo "=== tcpdump not available, tracing disabled"
1122	   fi
1123	   ;;
1124	*) usage;;
1125	esac
1126done
1127shift $(($OPTIND-1))
1128
1129IFS="
1130"
1131
1132for arg do
1133	# Check first that all requested tests are available before running any
1134	command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
1135done
1136
1137trap cleanup EXIT
1138
1139for t in ${tests}; do
1140	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
1141
1142	run_this=1
1143	for arg do
1144		[ "${arg}" != "${arg#--*}" ] && continue
1145		[ "${arg}" = "${name}" ] && run_this=1 && break
1146		run_this=0
1147	done
1148	[ $run_this -eq 0 ] && continue
1149
1150	(
1151		unset IFS
1152
1153		if [ "$VERBOSE" = "1" ]; then
1154			printf "\n##########################################################################\n\n"
1155		fi
1156
1157		eval test_${name}
1158		ret=$?
1159		cleanup
1160
1161		if [ $ret -eq 0 ]; then
1162			printf "TEST: %-60s  [ OK ]\n" "${t}"
1163		elif [ $ret -eq 1 ]; then
1164			printf "TEST: %-60s  [FAIL]\n" "${t}"
1165			if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
1166				echo
1167				echo "Pausing. Hit enter to continue"
1168				read a
1169			fi
1170			err_flush
1171			exit 1
1172		elif [ $ret -eq 2 ]; then
1173			printf "TEST: %-60s  [SKIP]\n" "${t}"
1174			err_flush
1175		fi
1176	)
1177	[ $? -ne 0 ] && exitcode=1
1178done
1179
1180exit ${exitcode}
1181