1#!/bin/sh 2# SPDX-License-Identifier: GPL-2.0 3# 4# Check that route PMTU values match expectations, and that initial device MTU 5# values are assigned correctly 6# 7# Tests currently implemented: 8# 9# - pmtu_ipv4 10# Set up two namespaces, A and B, with two paths between them over routers 11# R1 and R2 (also implemented with namespaces), with different MTUs: 12# 13# segment a_r1 segment b_r1 a_r1: 2000 14# .--------------R1--------------. a_r2: 1500 15# A B a_r3: 2000 16# '--------------R2--------------' a_r4: 1400 17# segment a_r2 segment b_r2 18# 19# Check that PMTU exceptions with the correct PMTU are created. Then 20# decrease and increase the MTU of the local link for one of the paths, 21# A to R1, checking that route exception PMTU changes accordingly over 22# this path. Also check that locked exceptions are created when an ICMP 23# message advertising a PMTU smaller than net.ipv4.route.min_pmtu is 24# received 25# 26# - pmtu_ipv6 27# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 28# 29# - pmtu_ipv4_vxlan4_exception 30# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel 31# over IPv4 between A and B, routed via R1. On the link between R1 and B, 32# set a MTU lower than the VXLAN MTU and the MTU on the link between A and 33# R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN 34# from A to B and check that the PMTU exception is created with the right 35# value on A 36# 37# - pmtu_ipv6_vxlan4_exception 38# Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B 39# 40# - pmtu_ipv4_vxlan6_exception 41# Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B 42# 43# - pmtu_ipv6_vxlan6_exception 44# Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B 45# 46# - pmtu_ipv4_geneve4_exception 47# Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of 48# VXLAN 49# 50# - pmtu_ipv6_geneve4_exception 51# Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of 52# VXLAN 53# 54# - pmtu_ipv4_geneve6_exception 55# Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of 56# VXLAN 57# 58# - pmtu_ipv6_geneve6_exception 59# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of 60# VXLAN 61# 62# - pmtu_ipv{4,6}_fou{4,6}_exception 63# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation 64# (FoU) over IPv4/IPv6, instead of VXLAN 65# 66# - pmtu_ipv{4,6}_fou{4,6}_exception 67# Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6 68# encapsulation (GUE) over IPv4/IPv6, instead of VXLAN 69# 70# - pmtu_vti4_exception 71# Set up vti tunnel on top of veth, with xfrm states and policies, in two 72# namespaces with matching endpoints. Check that route exception is not 73# created if link layer MTU is not exceeded, then exceed it and check that 74# exception is created with the expected PMTU. The approach described 75# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU 76# changes alone won't affect PMTU 77# 78# - pmtu_vti6_exception 79# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two 80# namespaces with matching endpoints. Check that route exception is 81# created by exceeding link layer MTU with ping to other endpoint. Then 82# decrease and increase MTU of tunnel, checking that route exception PMTU 83# changes accordingly 84# 85# - pmtu_vti4_default_mtu 86# Set up vti4 tunnel on top of veth, in two namespaces with matching 87# endpoints. Check that MTU assigned to vti interface is the MTU of the 88# lower layer (veth) minus additional lower layer headers (zero, for veth) 89# minus IPv4 header length 90# 91# - pmtu_vti6_default_mtu 92# Same as above, for IPv6 93# 94# - pmtu_vti4_link_add_mtu 95# Set up vti4 interface passing MTU value at link creation, check MTU is 96# configured, and that link is not created with invalid MTU values 97# 98# - pmtu_vti6_link_add_mtu 99# Same as above, for IPv6 100# 101# - pmtu_vti6_link_change_mtu 102# Set up two dummy interfaces with different MTUs, create a vti6 tunnel 103# and check that configured MTU is used on link creation and changes, and 104# that MTU is properly calculated instead when MTU is not configured from 105# userspace 106# 107# - cleanup_ipv4_exception 108# Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU 109# exceptions on multiple CPUs and check that the veth device tear-down 110# happens in a timely manner 111# 112# - cleanup_ipv6_exception 113# Same as above, but use IPv6 transport from A to B 114# 115# - list_flush_ipv4_exception 116# Using the same topology as in pmtu_ipv4, create exceptions, and check 117# they are shown when listing exception caches, gone after flushing them 118# 119# - list_flush_ipv6_exception 120# Using the same topology as in pmtu_ipv6, create exceptions, and check 121# they are shown when listing exception caches, gone after flushing them 122 123 124# Kselftest framework requirement - SKIP code is 4. 125ksft_skip=4 126 127PAUSE_ON_FAIL=no 128VERBOSE=0 129TRACING=0 130 131# Some systems don't have a ping6 binary anymore 132which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) 133 134# Name Description re-run with nh 135tests=" 136 pmtu_ipv4_exception ipv4: PMTU exceptions 1 137 pmtu_ipv6_exception ipv6: PMTU exceptions 1 138 pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1 139 pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1 140 pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1 141 pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions 1 142 pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions 1 143 pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1 144 pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1 145 pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1 146 pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1 147 pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1 148 pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1 149 pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions 1 150 pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions 1 151 pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1 152 pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1 153 pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1 154 pmtu_vti6_exception vti6: PMTU exceptions 0 155 pmtu_vti4_exception vti4: PMTU exceptions 0 156 pmtu_vti4_default_mtu vti4: default MTU assignment 0 157 pmtu_vti6_default_mtu vti6: default MTU assignment 0 158 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 159 pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 0 160 pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0 161 cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 162 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 163 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 164 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" 165 166NS_A="ns-A" 167NS_B="ns-B" 168NS_R1="ns-R1" 169NS_R2="ns-R2" 170ns_a="ip netns exec ${NS_A}" 171ns_b="ip netns exec ${NS_B}" 172ns_r1="ip netns exec ${NS_R1}" 173ns_r2="ip netns exec ${NS_R2}" 174 175# Addressing and routing for tests with routers: four network segments, with 176# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an 177# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). 178# Addresses are: 179# - IPv4: PREFIX4.SEGMENT.ID (/24) 180# - IPv6: PREFIX6:SEGMENT::ID (/64) 181prefix4="10.0" 182prefix6="fc00" 183a_r1=1 184a_r2=2 185b_r1=3 186b_r2=4 187# ns peer segment 188routing_addrs=" 189 A R1 ${a_r1} 190 A R2 ${a_r2} 191 B R1 ${b_r1} 192 B R2 ${b_r2} 193" 194# Traffic from A to B goes through R1 by default, and through R2, if destined to 195# B's address on the b_r2 segment. 196# Traffic from B to A goes through R1. 197# ns destination gateway 198routes=" 199 A default ${prefix4}.${a_r1}.2 200 A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2 201 B default ${prefix4}.${b_r1}.2 202 203 A default ${prefix6}:${a_r1}::2 204 A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 205 B default ${prefix6}:${b_r1}::2 206" 207 208USE_NH="no" 209# ns family nh id destination gateway 210nexthops=" 211 A 4 41 ${prefix4}.${a_r1}.2 veth_A-R1 212 A 4 42 ${prefix4}.${a_r2}.2 veth_A-R2 213 B 4 41 ${prefix4}.${b_r1}.2 veth_B-R1 214 215 A 6 61 ${prefix6}:${a_r1}::2 veth_A-R1 216 A 6 62 ${prefix6}:${a_r2}::2 veth_A-R2 217 B 6 61 ${prefix6}:${b_r1}::2 veth_B-R1 218" 219 220# nexthop id correlates to id in nexthops config above 221# ns family prefix nh id 222routes_nh=" 223 A 4 default 41 224 A 4 ${prefix4}.${b_r2}.1 42 225 B 4 default 41 226 227 A 6 default 61 228 A 6 ${prefix6}:${b_r2}::1 62 229 B 6 default 61 230" 231 232veth4_a_addr="192.168.1.1" 233veth4_b_addr="192.168.1.2" 234veth4_mask="24" 235veth6_a_addr="fd00:1::a" 236veth6_b_addr="fd00:1::b" 237veth6_mask="64" 238 239tunnel4_a_addr="192.168.2.1" 240tunnel4_b_addr="192.168.2.2" 241tunnel4_mask="24" 242tunnel6_a_addr="fd00:2::a" 243tunnel6_b_addr="fd00:2::b" 244tunnel6_mask="64" 245 246dummy6_0_prefix="fc00:1000::" 247dummy6_1_prefix="fc00:1001::" 248dummy6_mask="64" 249 250err_buf= 251tcpdump_pids= 252 253err() { 254 err_buf="${err_buf}${1} 255" 256} 257 258err_flush() { 259 echo -n "${err_buf}" 260 err_buf= 261} 262 263run_cmd() { 264 cmd="$*" 265 266 if [ "$VERBOSE" = "1" ]; then 267 printf " COMMAND: $cmd\n" 268 fi 269 270 out="$($cmd 2>&1)" 271 rc=$? 272 if [ "$VERBOSE" = "1" -a -n "$out" ]; then 273 echo " $out" 274 echo 275 fi 276 277 return $rc 278} 279 280# Find the auto-generated name for this namespace 281nsname() { 282 eval echo \$NS_$1 283} 284 285setup_fou_or_gue() { 286 outer="${1}" 287 inner="${2}" 288 encap="${3}" 289 290 if [ "${outer}" = "4" ]; then 291 modprobe fou || return 2 292 a_addr="${prefix4}.${a_r1}.1" 293 b_addr="${prefix4}.${b_r1}.1" 294 if [ "${inner}" = "4" ]; then 295 type="ipip" 296 ipproto="4" 297 else 298 type="sit" 299 ipproto="41" 300 fi 301 else 302 modprobe fou6 || return 2 303 a_addr="${prefix6}:${a_r1}::1" 304 b_addr="${prefix6}:${b_r1}::1" 305 if [ "${inner}" = "4" ]; then 306 type="ip6tnl" 307 mode="mode ipip6" 308 ipproto="4 -6" 309 else 310 type="ip6tnl" 311 mode="mode ip6ip6" 312 ipproto="41 -6" 313 fi 314 fi 315 316 run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 317 run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 318 319 run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} 320 run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 321 322 if [ "${inner}" = "4" ]; then 323 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a 324 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b 325 else 326 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a 327 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b 328 fi 329 330 run_cmd ${ns_a} ip link set ${encap}_a up 331 run_cmd ${ns_b} ip link set ${encap}_b up 332} 333 334setup_fou44() { 335 setup_fou_or_gue 4 4 fou 336} 337 338setup_fou46() { 339 setup_fou_or_gue 4 6 fou 340} 341 342setup_fou64() { 343 setup_fou_or_gue 6 4 fou 344} 345 346setup_fou66() { 347 setup_fou_or_gue 6 6 fou 348} 349 350setup_gue44() { 351 setup_fou_or_gue 4 4 gue 352} 353 354setup_gue46() { 355 setup_fou_or_gue 4 6 gue 356} 357 358setup_gue64() { 359 setup_fou_or_gue 6 4 gue 360} 361 362setup_gue66() { 363 setup_fou_or_gue 6 6 gue 364} 365 366setup_namespaces() { 367 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do 368 ip netns add ${n} || return 1 369 370 # Disable DAD, so that we don't have to wait to use the 371 # configured IPv6 addresses 372 ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 373 done 374} 375 376setup_veth() { 377 run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1 378 run_cmd ${ns_a} ip link set veth_b netns ${NS_B} 379 380 run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a 381 run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b 382 383 run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a 384 run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b 385 386 run_cmd ${ns_a} ip link set veth_a up 387 run_cmd ${ns_b} ip link set veth_b up 388} 389 390setup_vti() { 391 proto=${1} 392 veth_a_addr="${2}" 393 veth_b_addr="${3}" 394 vti_a_addr="${4}" 395 vti_b_addr="${5}" 396 vti_mask=${6} 397 398 [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti" 399 400 run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1 401 run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10 402 403 run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a 404 run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b 405 406 run_cmd ${ns_a} ip link set vti${proto}_a up 407 run_cmd ${ns_b} ip link set vti${proto}_b up 408} 409 410setup_vti4() { 411 setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} 412} 413 414setup_vti6() { 415 setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} 416} 417 418setup_vxlan_or_geneve() { 419 type="${1}" 420 a_addr="${2}" 421 b_addr="${3}" 422 opts="${4}" 423 424 if [ "${type}" = "vxlan" ]; then 425 opts="${opts} ttl 64 dstport 4789" 426 opts_a="local ${a_addr}" 427 opts_b="local ${b_addr}" 428 else 429 opts_a="" 430 opts_b="" 431 fi 432 433 run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1 434 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} 435 436 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a 437 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b 438 439 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a 440 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b 441 442 run_cmd ${ns_a} ip link set ${type}_a up 443 run_cmd ${ns_b} ip link set ${type}_b up 444} 445 446setup_geneve4() { 447 setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" 448} 449 450setup_vxlan4() { 451 setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" 452} 453 454setup_geneve6() { 455 setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 456} 457 458setup_vxlan6() { 459 setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 460} 461 462setup_xfrm() { 463 proto=${1} 464 veth_a_addr="${2}" 465 veth_b_addr="${3}" 466 467 run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 468 run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 469 run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 470 run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 471 472 run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 473 run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 474 run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 475 run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 476} 477 478setup_xfrm4() { 479 setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} 480} 481 482setup_xfrm6() { 483 setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} 484} 485 486setup_routing_old() { 487 for i in ${routes}; do 488 [ "${ns}" = "" ] && ns="${i}" && continue 489 [ "${addr}" = "" ] && addr="${i}" && continue 490 [ "${gw}" = "" ] && gw="${i}" 491 492 ns_name="$(nsname ${ns})" 493 494 ip -n ${ns_name} route add ${addr} via ${gw} 495 496 ns=""; addr=""; gw="" 497 done 498} 499 500setup_routing_new() { 501 for i in ${nexthops}; do 502 [ "${ns}" = "" ] && ns="${i}" && continue 503 [ "${fam}" = "" ] && fam="${i}" && continue 504 [ "${nhid}" = "" ] && nhid="${i}" && continue 505 [ "${gw}" = "" ] && gw="${i}" && continue 506 [ "${dev}" = "" ] && dev="${i}" 507 508 ns_name="$(nsname ${ns})" 509 510 ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev} 511 512 ns=""; fam=""; nhid=""; gw=""; dev="" 513 514 done 515 516 for i in ${routes_nh}; do 517 [ "${ns}" = "" ] && ns="${i}" && continue 518 [ "${fam}" = "" ] && fam="${i}" && continue 519 [ "${addr}" = "" ] && addr="${i}" && continue 520 [ "${nhid}" = "" ] && nhid="${i}" 521 522 ns_name="$(nsname ${ns})" 523 524 ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid} 525 526 ns=""; fam=""; addr=""; nhid="" 527 done 528} 529 530setup_routing() { 531 for i in ${NS_R1} ${NS_R2}; do 532 ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1 533 ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1 534 done 535 536 for i in ${routing_addrs}; do 537 [ "${ns}" = "" ] && ns="${i}" && continue 538 [ "${peer}" = "" ] && peer="${i}" && continue 539 [ "${segment}" = "" ] && segment="${i}" 540 541 ns_name="$(nsname ${ns})" 542 peer_name="$(nsname ${peer})" 543 if="veth_${ns}-${peer}" 544 ifpeer="veth_${peer}-${ns}" 545 546 # Create veth links 547 ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1 548 ip -n ${peer_name} link set dev ${ifpeer} up 549 550 # Add addresses 551 ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if} 552 ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if} 553 554 ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer} 555 ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer} 556 557 ns=""; peer=""; segment="" 558 done 559 560 if [ "$USE_NH" = "yes" ]; then 561 setup_routing_new 562 else 563 setup_routing_old 564 fi 565 566 return 0 567} 568 569setup() { 570 [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip 571 572 cleanup 573 for arg do 574 eval setup_${arg} || { echo " ${arg} not supported"; return 1; } 575 done 576} 577 578trace() { 579 [ $TRACING -eq 0 ] && return 580 581 for arg do 582 [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue 583 ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & 584 tcpdump_pids="${tcpdump_pids} $!" 585 ns_cmd= 586 done 587 sleep 1 588} 589 590cleanup() { 591 for pid in ${tcpdump_pids}; do 592 kill ${pid} 593 done 594 tcpdump_pids= 595 596 for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do 597 ip netns del ${n} 2> /dev/null 598 done 599} 600 601mtu() { 602 ns_cmd="${1}" 603 dev="${2}" 604 mtu="${3}" 605 606 ${ns_cmd} ip link set dev ${dev} mtu ${mtu} 607} 608 609mtu_parse() { 610 input="${1}" 611 612 next=0 613 for i in ${input}; do 614 [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue 615 [ ${next} -eq 1 ] && echo "${i}" && return 616 [ ${next} -eq 2 ] && echo "lock ${i}" && return 617 [ "${i}" = "mtu" ] && next=1 618 done 619} 620 621link_get() { 622 ns_cmd="${1}" 623 name="${2}" 624 625 ${ns_cmd} ip link show dev "${name}" 626} 627 628link_get_mtu() { 629 ns_cmd="${1}" 630 name="${2}" 631 632 mtu_parse "$(link_get "${ns_cmd}" ${name})" 633} 634 635route_get_dst_exception() { 636 ns_cmd="${1}" 637 dst="${2}" 638 639 ${ns_cmd} ip route get "${dst}" 640} 641 642route_get_dst_pmtu_from_exception() { 643 ns_cmd="${1}" 644 dst="${2}" 645 646 mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" 647} 648 649check_pmtu_value() { 650 expected="${1}" 651 value="${2}" 652 event="${3}" 653 654 [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0 655 [ "${value}" = "${expected}" ] && return 0 656 [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1 657 [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1 658 err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}" 659 return 1 660} 661 662test_pmtu_ipvX() { 663 family=${1} 664 665 setup namespaces routing || return 2 666 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 667 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ 668 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ 669 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 670 671 if [ ${family} -eq 4 ]; then 672 ping=ping 673 dst1="${prefix4}.${b_r1}.1" 674 dst2="${prefix4}.${b_r2}.1" 675 else 676 ping=${ping6} 677 dst1="${prefix6}:${b_r1}::1" 678 dst2="${prefix6}:${b_r2}::1" 679 fi 680 681 # Set up initial MTU values 682 mtu "${ns_a}" veth_A-R1 2000 683 mtu "${ns_r1}" veth_R1-A 2000 684 mtu "${ns_r1}" veth_R1-B 1400 685 mtu "${ns_b}" veth_B-R1 1400 686 687 mtu "${ns_a}" veth_A-R2 2000 688 mtu "${ns_r2}" veth_R2-A 2000 689 mtu "${ns_r2}" veth_R2-B 1500 690 mtu "${ns_b}" veth_B-R2 1500 691 692 # Create route exceptions 693 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} 694 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} 695 696 # Check that exceptions have been created with the correct PMTU 697 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" 698 check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 699 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 700 check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 701 702 # Decrease local MTU below PMTU, check for PMTU decrease in route exception 703 mtu "${ns_a}" veth_A-R1 1300 704 mtu "${ns_r1}" veth_R1-A 1300 705 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" 706 check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1 707 # Second exception shouldn't be modified 708 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 709 check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 710 711 # Increase MTU, check for PMTU increase in route exception 712 mtu "${ns_a}" veth_A-R1 1700 713 mtu "${ns_r1}" veth_R1-A 1700 714 pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" 715 check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1 716 # Second exception shouldn't be modified 717 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 718 check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 719 720 # Skip PMTU locking tests for IPv6 721 [ $family -eq 6 ] && return 0 722 723 # Decrease remote MTU on path via R2, get new exception 724 mtu "${ns_r2}" veth_R2-B 400 725 mtu "${ns_b}" veth_B-R2 400 726 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} 727 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 728 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 729 730 # Decrease local MTU below PMTU 731 mtu "${ns_a}" veth_A-R2 500 732 mtu "${ns_r2}" veth_R2-A 500 733 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 734 check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1 735 736 # Increase local MTU 737 mtu "${ns_a}" veth_A-R2 1500 738 mtu "${ns_r2}" veth_R2-A 1500 739 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 740 check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1 741 742 # Get new exception 743 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} 744 pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 745 check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 746} 747 748test_pmtu_ipv4_exception() { 749 test_pmtu_ipvX 4 750} 751 752test_pmtu_ipv6_exception() { 753 test_pmtu_ipvX 6 754} 755 756test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { 757 type=${1} 758 family=${2} 759 outer_family=${3} 760 ll_mtu=4000 761 762 if [ ${outer_family} -eq 4 ]; then 763 setup namespaces routing ${type}4 || return 2 764 # IPv4 header UDP header VXLAN/GENEVE header Ethernet header 765 exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) 766 else 767 setup namespaces routing ${type}6 || return 2 768 # IPv6 header UDP header VXLAN/GENEVE header Ethernet header 769 exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) 770 fi 771 772 trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \ 773 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 774 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B 775 776 if [ ${family} -eq 4 ]; then 777 ping=ping 778 dst=${tunnel4_b_addr} 779 else 780 ping=${ping6} 781 dst=${tunnel6_b_addr} 782 fi 783 784 # Create route exception by exceeding link layer MTU 785 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) 786 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) 787 mtu "${ns_b}" veth_B-R1 ${ll_mtu} 788 mtu "${ns_r1}" veth_R1-B ${ll_mtu} 789 790 mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000)) 791 mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) 792 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} 793 794 # Check that exception was created 795 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" 796 check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface" 797} 798 799test_pmtu_ipv4_vxlan4_exception() { 800 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 4 801} 802 803test_pmtu_ipv6_vxlan4_exception() { 804 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 4 805} 806 807test_pmtu_ipv4_geneve4_exception() { 808 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4 809} 810 811test_pmtu_ipv6_geneve4_exception() { 812 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4 813} 814 815test_pmtu_ipv4_vxlan6_exception() { 816 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 6 817} 818 819test_pmtu_ipv6_vxlan6_exception() { 820 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 6 821} 822 823test_pmtu_ipv4_geneve6_exception() { 824 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6 825} 826 827test_pmtu_ipv6_geneve6_exception() { 828 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6 829} 830 831test_pmtu_ipvX_over_fouY_or_gueY() { 832 inner_family=${1} 833 outer_family=${2} 834 encap=${3} 835 ll_mtu=4000 836 837 setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 838 trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ 839 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 840 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B 841 842 if [ ${inner_family} -eq 4 ]; then 843 ping=ping 844 dst=${tunnel4_b_addr} 845 else 846 ping=${ping6} 847 dst=${tunnel6_b_addr} 848 fi 849 850 if [ "${encap}" = "gue" ]; then 851 encap_overhead=4 852 else 853 encap_overhead=0 854 fi 855 856 if [ ${outer_family} -eq 4 ]; then 857 # IPv4 header UDP header 858 exp_mtu=$((${ll_mtu} - 20 - 8 - ${encap_overhead})) 859 else 860 # IPv6 header Option 4 UDP header 861 exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - ${encap_overhead})) 862 fi 863 864 # Create route exception by exceeding link layer MTU 865 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) 866 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) 867 mtu "${ns_b}" veth_B-R1 ${ll_mtu} 868 mtu "${ns_r1}" veth_R1-B ${ll_mtu} 869 870 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) 871 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) 872 run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} 873 874 # Check that exception was created 875 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" 876 check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface" 877} 878 879test_pmtu_ipv4_fou4_exception() { 880 test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou 881} 882 883test_pmtu_ipv6_fou4_exception() { 884 test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou 885} 886 887test_pmtu_ipv4_fou6_exception() { 888 test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou 889} 890 891test_pmtu_ipv6_fou6_exception() { 892 test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou 893} 894 895test_pmtu_ipv4_gue4_exception() { 896 test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue 897} 898 899test_pmtu_ipv6_gue4_exception() { 900 test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue 901} 902 903test_pmtu_ipv4_gue6_exception() { 904 test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue 905} 906 907test_pmtu_ipv6_gue6_exception() { 908 test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue 909} 910 911test_pmtu_vti4_exception() { 912 setup namespaces veth vti4 xfrm4 || return 2 913 trace "${ns_a}" veth_a "${ns_b}" veth_b \ 914 "${ns_a}" vti4_a "${ns_b}" vti4_b 915 916 veth_mtu=1500 917 vti_mtu=$((veth_mtu - 20)) 918 919 # SPI SN IV ICV pad length next header 920 esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1)) 921 ping_payload=$((esp_payload_rfc4106 - 28)) 922 923 mtu "${ns_a}" veth_a ${veth_mtu} 924 mtu "${ns_b}" veth_b ${veth_mtu} 925 mtu "${ns_a}" vti4_a ${vti_mtu} 926 mtu "${ns_b}" vti4_b ${vti_mtu} 927 928 # Send DF packet without exceeding link layer MTU, check that no 929 # exception is created 930 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} 931 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 932 check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 933 934 # Now exceed link layer MTU by one byte, check that exception is created 935 # with the right PMTU value 936 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} 937 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" 938 check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" 939} 940 941test_pmtu_vti6_exception() { 942 setup namespaces veth vti6 xfrm6 || return 2 943 trace "${ns_a}" veth_a "${ns_b}" veth_b \ 944 "${ns_a}" vti6_a "${ns_b}" vti6_b 945 fail=0 946 947 # Create route exception by exceeding link layer MTU 948 mtu "${ns_a}" veth_a 4000 949 mtu "${ns_b}" veth_b 4000 950 mtu "${ns_a}" vti6_a 5000 951 mtu "${ns_b}" vti6_b 5000 952 run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} 953 954 # Check that exception was created 955 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 956 check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 957 958 # Decrease tunnel MTU, check for PMTU decrease in route exception 959 mtu "${ns_a}" vti6_a 3000 960 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 961 check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 962 963 # Increase tunnel MTU, check for PMTU increase in route exception 964 mtu "${ns_a}" vti6_a 9000 965 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" 966 check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 967 968 return ${fail} 969} 970 971test_pmtu_vti4_default_mtu() { 972 setup namespaces veth vti4 || return 2 973 974 # Check that MTU of vti device is MTU of veth minus IPv4 header length 975 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" 976 vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)" 977 if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then 978 err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length" 979 return 1 980 fi 981} 982 983test_pmtu_vti6_default_mtu() { 984 setup namespaces veth vti6 || return 2 985 986 # Check that MTU of vti device is MTU of veth minus IPv6 header length 987 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" 988 vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)" 989 if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then 990 err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length" 991 return 1 992 fi 993} 994 995test_pmtu_vti4_link_add_mtu() { 996 setup namespaces || return 2 997 998 run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 999 [ $? -ne 0 ] && err " vti not supported" && return 2 1000 run_cmd ${ns_a} ip link del vti4_a 1001 1002 fail=0 1003 1004 min=68 1005 max=$((65535 - 20)) 1006 # Check invalid values first 1007 for v in $((min - 1)) $((max + 1)); do 1008 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 1009 # This can fail, or MTU can be adjusted to a proper value 1010 [ $? -ne 0 ] && continue 1011 mtu="$(link_get_mtu "${ns_a}" vti4_a)" 1012 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then 1013 err " vti tunnel created with invalid MTU ${mtu}" 1014 fail=1 1015 fi 1016 run_cmd ${ns_a} ip link del vti4_a 1017 done 1018 1019 # Now check valid values 1020 for v in ${min} 1300 ${max}; do 1021 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 1022 mtu="$(link_get_mtu "${ns_a}" vti4_a)" 1023 run_cmd ${ns_a} ip link del vti4_a 1024 if [ "${mtu}" != "${v}" ]; then 1025 err " vti MTU ${mtu} doesn't match configured value ${v}" 1026 fail=1 1027 fi 1028 done 1029 1030 return ${fail} 1031} 1032 1033test_pmtu_vti6_link_add_mtu() { 1034 setup namespaces || return 2 1035 1036 run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 1037 [ $? -ne 0 ] && err " vti6 not supported" && return 2 1038 run_cmd ${ns_a} ip link del vti6_a 1039 1040 fail=0 1041 1042 min=68 # vti6 can carry IPv4 packets too 1043 max=$((65535 - 40)) 1044 # Check invalid values first 1045 for v in $((min - 1)) $((max + 1)); do 1046 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 1047 # This can fail, or MTU can be adjusted to a proper value 1048 [ $? -ne 0 ] && continue 1049 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1050 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then 1051 err " vti6 tunnel created with invalid MTU ${v}" 1052 fail=1 1053 fi 1054 run_cmd ${ns_a} ip link del vti6_a 1055 done 1056 1057 # Now check valid values 1058 for v in 68 1280 1300 $((65535 - 40)); do 1059 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 1060 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1061 run_cmd ${ns_a} ip link del vti6_a 1062 if [ "${mtu}" != "${v}" ]; then 1063 err " vti6 MTU ${mtu} doesn't match configured value ${v}" 1064 fail=1 1065 fi 1066 done 1067 1068 return ${fail} 1069} 1070 1071test_pmtu_vti6_link_change_mtu() { 1072 setup namespaces || return 2 1073 1074 run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy 1075 [ $? -ne 0 ] && err " dummy not supported" && return 2 1076 run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy 1077 run_cmd ${ns_a} ip link set dummy0 up 1078 run_cmd ${ns_a} ip link set dummy1 up 1079 1080 run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0 1081 run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1 1082 1083 fail=0 1084 1085 # Create vti6 interface bound to device, passing MTU, check it 1086 run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1 1087 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1088 if [ ${mtu} -ne 1300 ]; then 1089 err " vti6 MTU ${mtu} doesn't match configured value 1300" 1090 fail=1 1091 fi 1092 1093 # Move to another device with different MTU, without passing MTU, check 1094 # MTU is adjusted 1095 run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1 1096 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1097 if [ ${mtu} -ne $((3000 - 40)) ]; then 1098 err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length" 1099 fail=1 1100 fi 1101 1102 # Move it back, passing MTU, check MTU is not overridden 1103 run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1 1104 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 1105 if [ ${mtu} -ne 1280 ]; then 1106 err " vti6 MTU ${mtu} doesn't match configured value 1280" 1107 fail=1 1108 fi 1109 1110 return ${fail} 1111} 1112 1113check_command() { 1114 cmd=${1} 1115 1116 if ! which ${cmd} > /dev/null 2>&1; then 1117 err " missing required command: '${cmd}'" 1118 return 1 1119 fi 1120 return 0 1121} 1122 1123test_cleanup_vxlanX_exception() { 1124 outer="${1}" 1125 encap="vxlan" 1126 ll_mtu=4000 1127 1128 check_command taskset || return 2 1129 cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) 1130 1131 setup namespaces routing ${encap}${outer} || return 2 1132 trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ 1133 "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 1134 "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B 1135 1136 # Create route exception by exceeding link layer MTU 1137 mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) 1138 mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) 1139 mtu "${ns_b}" veth_B-R1 ${ll_mtu} 1140 mtu "${ns_r1}" veth_R1-B ${ll_mtu} 1141 1142 mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) 1143 mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) 1144 1145 # Fill exception cache for multiple CPUs (2) 1146 # we can always use inner IPv4 for that 1147 for cpu in ${cpu_list}; do 1148 run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} 1149 done 1150 1151 ${ns_a} ip link del dev veth_A-R1 & 1152 iplink_pid=$! 1153 sleep 1 1154 if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then 1155 err " can't delete veth device in a timely manner, PMTU dst likely leaked" 1156 return 1 1157 fi 1158} 1159 1160test_cleanup_ipv6_exception() { 1161 test_cleanup_vxlanX_exception 6 1162} 1163 1164test_cleanup_ipv4_exception() { 1165 test_cleanup_vxlanX_exception 4 1166} 1167 1168run_test() { 1169 ( 1170 tname="$1" 1171 tdesc="$2" 1172 1173 unset IFS 1174 1175 if [ "$VERBOSE" = "1" ]; then 1176 printf "\n##########################################################################\n\n" 1177 fi 1178 1179 eval test_${tname} 1180 ret=$? 1181 1182 if [ $ret -eq 0 ]; then 1183 printf "TEST: %-60s [ OK ]\n" "${tdesc}" 1184 elif [ $ret -eq 1 ]; then 1185 printf "TEST: %-60s [FAIL]\n" "${tdesc}" 1186 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then 1187 echo 1188 echo "Pausing. Hit enter to continue" 1189 read a 1190 fi 1191 err_flush 1192 exit 1 1193 elif [ $ret -eq 2 ]; then 1194 printf "TEST: %-60s [SKIP]\n" "${tdesc}" 1195 err_flush 1196 fi 1197 1198 return $ret 1199 ) 1200 ret=$? 1201 [ $ret -ne 0 ] && exitcode=1 1202 1203 return $ret 1204} 1205 1206run_test_nh() { 1207 tname="$1" 1208 tdesc="$2" 1209 1210 USE_NH=yes 1211 run_test "${tname}" "${tdesc} - nexthop objects" 1212 USE_NH=no 1213} 1214 1215test_list_flush_ipv4_exception() { 1216 setup namespaces routing || return 2 1217 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 1218 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ 1219 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ 1220 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 1221 1222 dst_prefix1="${prefix4}.${b_r1}." 1223 dst2="${prefix4}.${b_r2}.1" 1224 1225 # Set up initial MTU values 1226 mtu "${ns_a}" veth_A-R1 2000 1227 mtu "${ns_r1}" veth_R1-A 2000 1228 mtu "${ns_r1}" veth_R1-B 1500 1229 mtu "${ns_b}" veth_B-R1 1500 1230 1231 mtu "${ns_a}" veth_A-R2 2000 1232 mtu "${ns_r2}" veth_R2-A 2000 1233 mtu "${ns_r2}" veth_R2-B 1500 1234 mtu "${ns_b}" veth_B-R2 1500 1235 1236 fail=0 1237 1238 # Add 100 addresses for veth endpoint on B reached by default A route 1239 for i in $(seq 100 199); do 1240 run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1 1241 done 1242 1243 # Create 100 cached route exceptions for path via R1, one via R2. Note 1244 # that with IPv4 we need to actually cause a route lookup that matches 1245 # the exception caused by ICMP, in order to actually have a cached 1246 # route, so we need to ping each destination twice 1247 for i in $(seq 100 199); do 1248 run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}" 1249 done 1250 run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}" 1251 1252 # Each exception is printed as two lines 1253 if [ "$(${ns_a} ip route list cache | wc -l)" -ne 202 ]; then 1254 err " can't list cached exceptions" 1255 fail=1 1256 fi 1257 1258 run_cmd ${ns_a} ip route flush cache 1259 pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)" 1260 pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)" 1261 if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \ 1262 [ -n "$(${ns_a} ip route list cache)" ]; then 1263 err " can't flush cached exceptions" 1264 fail=1 1265 fi 1266 1267 return ${fail} 1268} 1269 1270test_list_flush_ipv6_exception() { 1271 setup namespaces routing || return 2 1272 trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ 1273 "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ 1274 "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ 1275 "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 1276 1277 dst_prefix1="${prefix6}:${b_r1}::" 1278 dst2="${prefix6}:${b_r2}::1" 1279 1280 # Set up initial MTU values 1281 mtu "${ns_a}" veth_A-R1 2000 1282 mtu "${ns_r1}" veth_R1-A 2000 1283 mtu "${ns_r1}" veth_R1-B 1500 1284 mtu "${ns_b}" veth_B-R1 1500 1285 1286 mtu "${ns_a}" veth_A-R2 2000 1287 mtu "${ns_r2}" veth_R2-A 2000 1288 mtu "${ns_r2}" veth_R2-B 1500 1289 mtu "${ns_b}" veth_B-R2 1500 1290 1291 fail=0 1292 1293 # Add 100 addresses for veth endpoint on B reached by default A route 1294 for i in $(seq 100 199); do 1295 run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1 1296 done 1297 1298 # Create 100 cached route exceptions for path via R1, one via R2 1299 for i in $(seq 100 199); do 1300 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}" 1301 done 1302 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}" 1303 if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 101 ]; then 1304 err " can't list cached exceptions" 1305 fail=1 1306 fi 1307 1308 run_cmd ${ns_a} ip -6 route flush cache 1309 pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")" 1310 pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" 1311 if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \ 1312 [ -n "$(${ns_a} ip -6 route list cache)" ]; then 1313 err " can't flush cached exceptions" 1314 fail=1 1315 fi 1316 1317 return ${fail} 1318} 1319 1320usage() { 1321 echo 1322 echo "$0 [OPTIONS] [TEST]..." 1323 echo "If no TEST argument is given, all tests will be run." 1324 echo 1325 echo "Options" 1326 echo " --trace: capture traffic to TEST_INTERFACE.pcap" 1327 echo 1328 echo "Available tests${tests}" 1329 exit 1 1330} 1331 1332################################################################################ 1333# 1334exitcode=0 1335desc=0 1336 1337while getopts :ptv o 1338do 1339 case $o in 1340 p) PAUSE_ON_FAIL=yes;; 1341 v) VERBOSE=1;; 1342 t) if which tcpdump > /dev/null 2>&1; then 1343 TRACING=1 1344 else 1345 echo "=== tcpdump not available, tracing disabled" 1346 fi 1347 ;; 1348 *) usage;; 1349 esac 1350done 1351shift $(($OPTIND-1)) 1352 1353IFS=" 1354" 1355 1356for arg do 1357 # Check first that all requested tests are available before running any 1358 command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; } 1359done 1360 1361trap cleanup EXIT 1362 1363# start clean 1364cleanup 1365 1366HAVE_NH=no 1367ip nexthop ls >/dev/null 2>&1 1368[ $? -eq 0 ] && HAVE_NH=yes 1369 1370name="" 1371desc="" 1372rerun_nh=0 1373for t in ${tests}; do 1374 [ "${name}" = "" ] && name="${t}" && continue 1375 [ "${desc}" = "" ] && desc="${t}" && continue 1376 1377 if [ "${HAVE_NH}" = "yes" ]; then 1378 rerun_nh="${t}" 1379 fi 1380 1381 run_this=1 1382 for arg do 1383 [ "${arg}" != "${arg#--*}" ] && continue 1384 [ "${arg}" = "${name}" ] && run_this=1 && break 1385 run_this=0 1386 done 1387 if [ $run_this -eq 1 ]; then 1388 run_test "${name}" "${desc}" 1389 # if test was skipped no need to retry with nexthop objects 1390 [ $? -eq 2 ] && rerun_nh=0 1391 1392 if [ "${rerun_nh}" = "1" ]; then 1393 run_test_nh "${name}" "${desc}" 1394 fi 1395 fi 1396 name="" 1397 desc="" 1398 rerun_nh=0 1399done 1400 1401exit ${exitcode} 1402