1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3 4# Kselftest framework requirement - SKIP code is 4. 5ksft_skip=4 6 7# Conntrack needs to reassemble fragments in order to have complete 8# packets for rule matching. Reassembly can lead to packet loss. 9 10# Consider the following setup: 11# +--------+ +---------+ +--------+ 12# |Router A|-------|Wanrouter|-------|Router B| 13# | |.IPIP..| |..IPIP.| | 14# +--------+ +---------+ +--------+ 15# / mtu 1400 \ 16# / \ 17#+--------+ +--------+ 18#|Client A| |Client B| 19#| | | | 20#+--------+ +--------+ 21 22# Router A and Router B use IPIP tunnel interfaces to tunnel traffic 23# between Client A and Client B over WAN. Wanrouter has MTU 1400 set 24# on its interfaces. 25 26rnd=$(mktemp -u XXXXXXXX) 27rx=$(mktemp) 28 29r_a="ns-ra-$rnd" 30r_b="ns-rb-$rnd" 31r_w="ns-rw-$rnd" 32c_a="ns-ca-$rnd" 33c_b="ns-cb-$rnd" 34 35checktool (){ 36 if ! $1 > /dev/null 2>&1; then 37 echo "SKIP: Could not $2" 38 exit $ksft_skip 39 fi 40} 41 42checktool "iptables --version" "run test without iptables" 43checktool "ip -Version" "run test without ip tool" 44checktool "which socat" "run test without socat" 45checktool "ip netns add ${r_a}" "create net namespace" 46 47for n in ${r_b} ${r_w} ${c_a} ${c_b};do 48 ip netns add ${n} 49done 50 51cleanup() { 52 for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do 53 ip netns del ${n} 54 done 55 rm -f ${rx} 56} 57 58trap cleanup EXIT 59 60test_path() { 61 msg="$1" 62 63 ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null & 64 65 sleep 1 66 for i in 1 2 3; do 67 head -c1400 /dev/zero | tr "\000" "a" | \ 68 ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000 69 done 70 71 wait 72 73 bytes=$(wc -c < ${rx}) 74 75 if [ $bytes -eq 1400 ];then 76 echo "OK: PMTU $msg connection tracking" 77 else 78 echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" 79 exit 1 80 fi 81} 82 83# Detailed setup for Router A 84# --------------------------- 85# Interfaces: 86# eth0: 10.2.2.1/24 87# eth1: 192.168.10.1/24 88# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 89# Routes: 90# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) 91# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) 92# No iptables rules at all. 93 94ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} 95ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} 96 97l_addr="10.2.2.1" 98r_addr="10.4.4.1" 99ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip 100 101for dev in lo veth0 veth1 ipip0; do 102 ip -net ${r_a} link set $dev up 103done 104 105ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 106ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 107 108ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 109ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 110 111ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null 112 113# Detailed setup for Router B 114# --------------------------- 115# Interfaces: 116# eth0: 10.4.4.1/24 117# eth1: 192.168.20.1/24 118# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 119# Routes: 120# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) 121# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) 122# No iptables rules at all. 123 124ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} 125ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} 126 127l_addr="10.4.4.1" 128r_addr="10.2.2.1" 129 130ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip 131 132for dev in lo veth0 veth1 ipip0; do 133 ip -net ${r_b} link set $dev up 134done 135 136ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 137ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 138 139ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 140ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 141ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null 142 143# Client A 144ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 145ip -net ${c_a} link set dev lo up 146ip -net ${c_a} link set dev veth0 up 147ip -net ${c_a} route add default via 192.168.10.1 148 149# Client A 150ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 151ip -net ${c_b} link set dev veth0 up 152ip -net ${c_b} link set dev lo up 153ip -net ${c_b} route add default via 192.168.20.1 154 155# Wan 156ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 157ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 158 159ip -net ${r_w} link set dev lo up 160ip -net ${r_w} link set dev veth0 up mtu 1400 161ip -net ${r_w} link set dev veth1 up mtu 1400 162 163ip -net ${r_a} link set dev veth0 mtu 1400 164ip -net ${r_b} link set dev veth0 mtu 1400 165 166ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null 167 168# Path MTU discovery 169# ------------------ 170# Running tracepath from Client A to Client B shows PMTU discovery is working 171# as expected: 172# 173# clienta:~# tracepath 192.168.20.2 174# 1?: [LOCALHOST] pmtu 1500 175# 1: 192.168.10.1 0.867ms 176# 1: 192.168.10.1 0.302ms 177# 2: 192.168.10.1 0.312ms pmtu 1480 178# 2: no reply 179# 3: 192.168.10.1 0.510ms pmtu 1380 180# 3: 192.168.20.2 2.320ms reached 181# Resume: pmtu 1380 hops 3 back 3 182 183# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 184 185# Router A has learned PMTU (1400) to Router B from Wanrouter. 186# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B 187# from Router A. 188 189#Send large UDP packet 190#--------------------- 191#Now we send a 1400 bytes UDP packet from Client A to Client B: 192 193# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000 194test_path "without" 195 196# The IPv4 stack on Client A already knows the PMTU to Client B, so the 197# UDP packet is sent as two fragments (1380 + 20). Router A forwards the 198# fragments between eth1 and ipip0. The fragments fit into the tunnel and 199# reach their destination. 200 201#When sending the large UDP packet again, Router A now reassembles the 202#fragments before routing the packet over ipip0. The resulting IPIP 203#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is 204#dropped on Router A before sending. 205 206ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW 207test_path "with" 208