1#!/bin/sh 2# SPDX-License-Identifier: GPL-2.0 3# 4# Check that route PMTU values match expectations, and that initial device MTU 5# values are assigned correctly 6# 7# Tests currently implemented: 8# 9# - pmtu_vti4_exception 10# Set up vti tunnel on top of veth, with xfrm states and policies, in two 11# namespaces with matching endpoints. Check that route exception is not 12# created if link layer MTU is not exceeded, then exceed it and check that 13# exception is created with the expected PMTU. The approach described 14# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU 15# changes alone won't affect PMTU 16# 17# - pmtu_vti6_exception 18# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two 19# namespaces with matching endpoints. Check that route exception is 20# created by exceeding link layer MTU with ping to other endpoint. Then 21# decrease and increase MTU of tunnel, checking that route exception PMTU 22# changes accordingly 23# 24# - pmtu_vti4_default_mtu 25# Set up vti4 tunnel on top of veth, in two namespaces with matching 26# endpoints. Check that MTU assigned to vti interface is the MTU of the 27# lower layer (veth) minus additional lower layer headers (zero, for veth) 28# minus IPv4 header length 29# 30# - pmtu_vti6_default_mtu 31# Same as above, for IPv6 32# 33# - pmtu_vti4_link_add_mtu 34# Set up vti4 interface passing MTU value at link creation, check MTU is 35# configured, and that link is not created with invalid MTU values 36# 37# - pmtu_vti6_link_add_mtu 38# Same as above, for IPv6 39# 40# - pmtu_vti6_link_change_mtu 41# Set up two dummy interfaces with different MTUs, create a vti6 tunnel 42# and check that configured MTU is used on link creation and changes, and 43# that MTU is properly calculated instead when MTU is not configured from 44# userspace 45 46tests=" 47 pmtu_vti6_exception vti6: PMTU exceptions 48 pmtu_vti4_exception vti4: PMTU exceptions 49 pmtu_vti4_default_mtu vti4: default MTU assignment 50 pmtu_vti6_default_mtu vti6: default MTU assignment 51 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 52 pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 53 pmtu_vti6_link_change_mtu vti6: MTU changes on link changes" 54 55NS_A="ns-$(mktemp -u XXXXXX)" 56NS_B="ns-$(mktemp -u XXXXXX)" 57ns_a="ip netns exec ${NS_A}" 58ns_b="ip netns exec ${NS_B}" 59 60veth4_a_addr="192.168.1.1" 61veth4_b_addr="192.168.1.2" 62veth4_mask="24" 63veth6_a_addr="fd00:1::a" 64veth6_b_addr="fd00:1::b" 65veth6_mask="64" 66 67vti4_a_addr="192.168.2.1" 68vti4_b_addr="192.168.2.2" 69vti4_mask="24" 70vti6_a_addr="fd00:2::a" 71vti6_b_addr="fd00:2::b" 72vti6_mask="64" 73 74dummy6_0_addr="fc00:1000::0" 75dummy6_1_addr="fc00:1001::0" 76dummy6_mask="64" 77 78cleanup_done=1 79err_buf= 80 81err() { 82 err_buf="${err_buf}${1} 83" 84} 85 86err_flush() { 87 echo -n "${err_buf}" 88 err_buf= 89} 90 91setup_namespaces() { 92 ip netns add ${NS_A} || return 1 93 ip netns add ${NS_B} 94} 95 96setup_veth() { 97 ${ns_a} ip link add veth_a type veth peer name veth_b || return 1 98 ${ns_a} ip link set veth_b netns ${NS_B} 99 100 ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a 101 ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b 102 103 ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a 104 ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b 105 106 ${ns_a} ip link set veth_a up 107 ${ns_b} ip link set veth_b up 108} 109 110setup_vti() { 111 proto=${1} 112 veth_a_addr="${2}" 113 veth_b_addr="${3}" 114 vti_a_addr="${4}" 115 vti_b_addr="${5}" 116 vti_mask=${6} 117 118 [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti" 119 120 ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1 121 ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10 122 123 ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a 124 ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b 125 126 ${ns_a} ip link set vti${proto}_a up 127 ${ns_b} ip link set vti${proto}_b up 128 129 sleep 1 130} 131 132setup_vti4() { 133 setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask} 134} 135 136setup_vti6() { 137 setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask} 138} 139 140setup_xfrm() { 141 proto=${1} 142 veth_a_addr="${2}" 143 veth_b_addr="${3}" 144 145 ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 146 ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 147 ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 148 ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 149 150 ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 151 ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel 152 ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel 153 ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel 154} 155 156setup_xfrm4() { 157 setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} 158} 159 160setup_xfrm6() { 161 setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} 162} 163 164setup() { 165 [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return 1 166 167 cleanup_done=0 168 for arg do 169 eval setup_${arg} || { echo " ${arg} not supported"; return 1; } 170 done 171} 172 173cleanup() { 174 [ ${cleanup_done} -eq 1 ] && return 175 ip netns del ${NS_A} 2 > /dev/null 176 ip netns del ${NS_B} 2 > /dev/null 177 cleanup_done=1 178} 179 180mtu() { 181 ns_cmd="${1}" 182 dev="${2}" 183 mtu="${3}" 184 185 ${ns_cmd} ip link set dev ${dev} mtu ${mtu} 186} 187 188mtu_parse() { 189 input="${1}" 190 191 next=0 192 for i in ${input}; do 193 [ ${next} -eq 1 ] && echo "${i}" && return 194 [ "${i}" = "mtu" ] && next=1 195 done 196} 197 198link_get() { 199 ns_cmd="${1}" 200 name="${2}" 201 202 ${ns_cmd} ip link show dev "${name}" 203} 204 205link_get_mtu() { 206 ns_cmd="${1}" 207 name="${2}" 208 209 mtu_parse "$(link_get "${ns_cmd}" ${name})" 210} 211 212route_get_dst_exception() { 213 ns_cmd="${1}" 214 dst="${2}" 215 216 ${ns_cmd} ip route get "${dst}" 217} 218 219route_get_dst_pmtu_from_exception() { 220 ns_cmd="${1}" 221 dst="${2}" 222 223 mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" 224} 225 226test_pmtu_vti4_exception() { 227 setup namespaces veth vti4 xfrm4 || return 2 228 229 veth_mtu=1500 230 vti_mtu=$((veth_mtu - 20)) 231 232 # SPI SN IV ICV pad length next header 233 esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1)) 234 ping_payload=$((esp_payload_rfc4106 - 28)) 235 236 mtu "${ns_a}" veth_a ${veth_mtu} 237 mtu "${ns_b}" veth_b ${veth_mtu} 238 mtu "${ns_a}" vti4_a ${vti_mtu} 239 mtu "${ns_b}" vti4_b ${vti_mtu} 240 241 # Send DF packet without exceeding link layer MTU, check that no 242 # exception is created 243 ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null 244 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" 245 if [ "${pmtu}" != "" ]; then 246 err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}" 247 return 1 248 fi 249 250 # Now exceed link layer MTU by one byte, check that exception is created 251 ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null 252 pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" 253 if [ "${pmtu}" = "" ]; then 254 err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))" 255 return 1 256 fi 257 258 # ...with the right PMTU value 259 if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then 260 err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}" 261 return 1 262 fi 263} 264 265test_pmtu_vti6_exception() { 266 setup namespaces veth vti6 xfrm6 || return 2 267 fail=0 268 269 # Create route exception by exceeding link layer MTU 270 mtu "${ns_a}" veth_a 4000 271 mtu "${ns_b}" veth_b 4000 272 mtu "${ns_a}" vti6_a 5000 273 mtu "${ns_b}" vti6_b 5000 274 ${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null 275 276 # Check that exception was created 277 if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then 278 err " tunnel exceeding link layer MTU didn't create route exception" 279 return 1 280 fi 281 282 # Decrease tunnel MTU, check for PMTU decrease in route exception 283 mtu "${ns_a}" vti6_a 3000 284 285 if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then 286 err " decreasing tunnel MTU didn't decrease route exception PMTU" 287 fail=1 288 fi 289 290 # Increase tunnel MTU, check for PMTU increase in route exception 291 mtu "${ns_a}" vti6_a 9000 292 if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then 293 err " increasing tunnel MTU didn't increase route exception PMTU" 294 fail=1 295 fi 296 297 return ${fail} 298} 299 300test_pmtu_vti4_default_mtu() { 301 setup namespaces veth vti4 || return 2 302 303 # Check that MTU of vti device is MTU of veth minus IPv4 header length 304 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" 305 vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)" 306 if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then 307 err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length" 308 return 1 309 fi 310} 311 312test_pmtu_vti6_default_mtu() { 313 setup namespaces veth vti6 || return 2 314 315 # Check that MTU of vti device is MTU of veth minus IPv6 header length 316 veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" 317 vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)" 318 if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then 319 err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length" 320 return 1 321 fi 322} 323 324test_pmtu_vti4_link_add_mtu() { 325 setup namespaces || return 2 326 327 ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 328 [ $? -ne 0 ] && err " vti not supported" && return 2 329 ${ns_a} ip link del vti4_a 330 331 fail=0 332 333 min=68 334 max=$((65528 - 20)) 335 # Check invalid values first 336 for v in $((min - 1)) $((max + 1)); do 337 ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null 338 # This can fail, or MTU can be adjusted to a proper value 339 [ $? -ne 0 ] && continue 340 mtu="$(link_get_mtu "${ns_a}" vti4_a)" 341 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then 342 err " vti tunnel created with invalid MTU ${mtu}" 343 fail=1 344 fi 345 ${ns_a} ip link del vti4_a 346 done 347 348 # Now check valid values 349 for v in ${min} 1300 ${max}; do 350 ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 351 mtu="$(link_get_mtu "${ns_a}" vti4_a)" 352 ${ns_a} ip link del vti4_a 353 if [ "${mtu}" != "${v}" ]; then 354 err " vti MTU ${mtu} doesn't match configured value ${v}" 355 fail=1 356 fi 357 done 358 359 return ${fail} 360} 361 362test_pmtu_vti6_link_add_mtu() { 363 setup namespaces || return 2 364 365 ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 366 [ $? -ne 0 ] && err " vti6 not supported" && return 2 367 ${ns_a} ip link del vti6_a 368 369 fail=0 370 371 min=1280 372 max=$((65535 - 40)) 373 # Check invalid values first 374 for v in $((min - 1)) $((max + 1)); do 375 ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null 376 # This can fail, or MTU can be adjusted to a proper value 377 [ $? -ne 0 ] && continue 378 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 379 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then 380 err " vti6 tunnel created with invalid MTU ${v}" 381 fail=1 382 fi 383 ${ns_a} ip link del vti6_a 384 done 385 386 # Now check valid values 387 for v in 1280 1300 $((65535 - 40)); do 388 ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 389 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 390 ${ns_a} ip link del vti6_a 391 if [ "${mtu}" != "${v}" ]; then 392 err " vti6 MTU ${mtu} doesn't match configured value ${v}" 393 fail=1 394 fi 395 done 396 397 return ${fail} 398} 399 400test_pmtu_vti6_link_change_mtu() { 401 setup namespaces || return 2 402 403 ${ns_a} ip link add dummy0 mtu 1500 type dummy 404 [ $? -ne 0 ] && err " dummy not supported" && return 2 405 ${ns_a} ip link add dummy1 mtu 3000 type dummy 406 ${ns_a} ip link set dummy0 up 407 ${ns_a} ip link set dummy1 up 408 409 ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0 410 ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1 411 412 fail=0 413 414 # Create vti6 interface bound to device, passing MTU, check it 415 ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr} 416 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 417 if [ ${mtu} -ne 1300 ]; then 418 err " vti6 MTU ${mtu} doesn't match configured value 1300" 419 fail=1 420 fi 421 422 # Move to another device with different MTU, without passing MTU, check 423 # MTU is adjusted 424 ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr} 425 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 426 if [ ${mtu} -ne $((3000 - 40)) ]; then 427 err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length" 428 fail=1 429 fi 430 431 # Move it back, passing MTU, check MTU is not overridden 432 ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr} 433 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 434 if [ ${mtu} -ne 1280 ]; then 435 err " vti6 MTU ${mtu} doesn't match configured value 1280" 436 fail=1 437 fi 438 439 return ${fail} 440} 441 442trap cleanup EXIT 443 444exitcode=0 445desc=0 446IFS=" 447" 448for t in ${tests}; do 449 [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0 450 451 ( 452 unset IFS 453 eval test_${name} 454 ret=$? 455 cleanup 456 457 if [ $ret -eq 0 ]; then 458 printf "TEST: %-60s [ OK ]\n" "${t}" 459 elif [ $ret -eq 1 ]; then 460 printf "TEST: %-60s [FAIL]\n" "${t}" 461 err_flush 462 exit 1 463 elif [ $ret -eq 2 ]; then 464 printf "TEST: %-60s [SKIP]\n" "${t}" 465 err_flush 466 fi 467 ) 468 [ $? -ne 0 ] && exitcode=1 469done 470 471exit ${exitcode} 472