1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# This tests basic flowtable functionality. 5# Creates following topology: 6# 7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) 8# Router1 is the one doing flow offloading, Router2 has no special 9# purpose other than having a link that is smaller than either Originator 10# and responder, i.e. TCPMSS announced values are too large and will still 11# result in fragmentation and/or PMTU discovery. 12 13# Kselftest framework requirement - SKIP code is 4. 14ksft_skip=4 15ret=0 16 17ns1in="" 18ns2in="" 19ns1out="" 20ns2out="" 21 22log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) 23 24nft --version > /dev/null 2>&1 25if [ $? -ne 0 ];then 26 echo "SKIP: Could not run test without nft tool" 27 exit $ksft_skip 28fi 29 30ip -Version > /dev/null 2>&1 31if [ $? -ne 0 ];then 32 echo "SKIP: Could not run test without ip tool" 33 exit $ksft_skip 34fi 35 36which nc > /dev/null 2>&1 37if [ $? -ne 0 ];then 38 echo "SKIP: Could not run test without nc (netcat)" 39 exit $ksft_skip 40fi 41 42ip netns add nsr1 43if [ $? -ne 0 ];then 44 echo "SKIP: Could not create net namespace" 45 exit $ksft_skip 46fi 47 48ip netns add ns1 49ip netns add ns2 50 51ip netns add nsr2 52 53cleanup() { 54 for i in 1 2; do 55 ip netns del ns$i 56 ip netns del nsr$i 57 done 58 59 rm -f "$ns1in" "$ns1out" 60 rm -f "$ns2in" "$ns2out" 61 62 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns 63} 64 65trap cleanup EXIT 66 67sysctl -q net.netfilter.nf_log_all_netns=1 68 69ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 70ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 71 72ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 73 74for dev in lo veth0 veth1; do 75 for i in 1 2; do 76 ip -net nsr$i link set $dev up 77 done 78done 79 80ip -net nsr1 addr add 10.0.1.1/24 dev veth0 81ip -net nsr1 addr add dead:1::1/64 dev veth0 82 83ip -net nsr2 addr add 10.0.2.1/24 dev veth1 84ip -net nsr2 addr add dead:2::1/64 dev veth1 85 86# set different MTUs so we need to push packets coming from ns1 (large MTU) 87# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), 88# or to do PTMU discovery (send ICMP error back to originator). 89# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers 90# is NOT the lowest link mtu. 91 92ip -net nsr1 link set veth0 mtu 9000 93ip -net ns1 link set eth0 mtu 9000 94 95ip -net nsr2 link set veth1 mtu 2000 96ip -net ns2 link set eth0 mtu 2000 97 98# transfer-net between nsr1 and nsr2. 99# these addresses are not used for connections. 100ip -net nsr1 addr add 192.168.10.1/24 dev veth1 101ip -net nsr1 addr add fee1:2::1/64 dev veth1 102 103ip -net nsr2 addr add 192.168.10.2/24 dev veth0 104ip -net nsr2 addr add fee1:2::2/64 dev veth0 105 106for i in 1 2; do 107 ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null 108 ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null 109 110 ip -net ns$i link set lo up 111 ip -net ns$i link set eth0 up 112 ip -net ns$i addr add 10.0.$i.99/24 dev eth0 113 ip -net ns$i route add default via 10.0.$i.1 114 ip -net ns$i addr add dead:$i::99/64 dev eth0 115 ip -net ns$i route add default via dead:$i::1 116 ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null 117 118 # don't set ip DF bit for first two tests 119 ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null 120done 121 122ip -net nsr1 route add default via 192.168.10.2 123ip -net nsr2 route add default via 192.168.10.1 124 125ip netns exec nsr1 nft -f - <<EOF 126table inet filter { 127 flowtable f1 { 128 hook ingress priority 0 129 devices = { veth0, veth1 } 130 } 131 132 chain forward { 133 type filter hook forward priority 0; policy drop; 134 135 # flow offloaded? Tag ct with mark 1, so we can detect when it fails. 136 meta oif "veth1" tcp dport 12345 flow offload @f1 counter 137 138 # use packet size to trigger 'should be offloaded by now'. 139 # otherwise, if 'flow offload' expression never offloads, the 140 # test will pass. 141 tcp dport 12345 meta length gt 200 ct mark set 1 counter 142 143 # this turns off flow offloading internally, so expect packets again 144 tcp flags fin,rst ct mark set 0 accept 145 146 # this allows large packets from responder, we need this as long 147 # as PMTUd is off. 148 # This rule is deleted for the last test, when we expect PMTUd 149 # to kick in and ensure all packets meet mtu requirements. 150 meta length gt 1500 accept comment something-to-grep-for 151 152 # next line blocks connection w.o. working offload. 153 # we only do this for reverse dir, because we expect packets to 154 # enter slow path due to MTU mismatch of veth0 and veth1. 155 tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop 156 157 ct state established,related accept 158 159 # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) 160 meta length lt 200 oif "veth1" tcp dport 12345 counter accept 161 162 meta nfproto ipv4 meta l4proto icmp accept 163 meta nfproto ipv6 meta l4proto icmpv6 accept 164 } 165} 166EOF 167 168if [ $? -ne 0 ]; then 169 echo "SKIP: Could not load nft ruleset" 170 exit $ksft_skip 171fi 172 173# test basic connectivity 174ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null 175if [ $? -ne 0 ];then 176 echo "ERROR: ns1 cannot reach ns2" 1>&2 177 bash 178 exit 1 179fi 180 181ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null 182if [ $? -ne 0 ];then 183 echo "ERROR: ns2 cannot reach ns1" 1>&2 184 exit 1 185fi 186 187if [ $ret -eq 0 ];then 188 echo "PASS: netns routing/connectivity: ns1 can reach ns2" 189fi 190 191ns1in=$(mktemp) 192ns1out=$(mktemp) 193ns2in=$(mktemp) 194ns2out=$(mktemp) 195 196make_file() 197{ 198 name=$1 199 who=$2 200 201 SIZE=$((RANDOM % (1024 * 8))) 202 TSIZE=$((SIZE * 1024)) 203 204 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null 205 206 SIZE=$((RANDOM % 1024)) 207 SIZE=$((SIZE + 128)) 208 TSIZE=$((TSIZE + SIZE)) 209 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null 210} 211 212check_transfer() 213{ 214 in=$1 215 out=$2 216 what=$3 217 218 cmp "$in" "$out" > /dev/null 2>&1 219 if [ $? -ne 0 ] ;then 220 echo "FAIL: file mismatch for $what" 1>&2 221 ls -l "$in" 222 ls -l "$out" 223 return 1 224 fi 225 226 return 0 227} 228 229test_tcp_forwarding_ip() 230{ 231 local nsa=$1 232 local nsb=$2 233 local dstip=$3 234 local dstport=$4 235 local lret=0 236 237 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & 238 lpid=$! 239 240 sleep 1 241 ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" & 242 cpid=$! 243 244 sleep 3 245 246 kill $lpid 247 kill $cpid 248 wait 249 250 check_transfer "$ns1in" "$ns2out" "ns1 -> ns2" 251 if [ $? -ne 0 ];then 252 lret=1 253 fi 254 255 check_transfer "$ns2in" "$ns1out" "ns1 <- ns2" 256 if [ $? -ne 0 ];then 257 lret=1 258 fi 259 260 return $lret 261} 262 263test_tcp_forwarding() 264{ 265 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 266 267 return $? 268} 269 270test_tcp_forwarding_nat() 271{ 272 local lret 273 274 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 275 lret=$? 276 277 if [ $lret -eq 0 ] ; then 278 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 279 lret=$? 280 fi 281 282 return $lret 283} 284 285make_file "$ns1in" "ns1" 286make_file "$ns2in" "ns2" 287 288# First test: 289# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. 290test_tcp_forwarding ns1 ns2 291if [ $? -eq 0 ] ;then 292 echo "PASS: flow offloaded for ns1/ns2" 293else 294 echo "FAIL: flow offload for ns1/ns2:" 1>&2 295 ip netns exec nsr1 nft list ruleset 296 ret=1 297fi 298 299# delete default route, i.e. ns2 won't be able to reach ns1 and 300# will depend on ns1 being masqueraded in nsr1. 301# expect ns1 has nsr1 address. 302ip -net ns2 route del default via 10.0.2.1 303ip -net ns2 route del default via dead:2::1 304ip -net ns2 route add 192.168.10.1 via 10.0.2.1 305 306# Second test: 307# Same, but with NAT enabled. 308ip netns exec nsr1 nft -f - <<EOF 309table ip nat { 310 chain prerouting { 311 type nat hook prerouting priority 0; policy accept; 312 meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 313 } 314 315 chain postrouting { 316 type nat hook postrouting priority 0; policy accept; 317 meta oifname "veth1" counter masquerade 318 } 319} 320EOF 321 322test_tcp_forwarding_nat ns1 ns2 323 324if [ $? -eq 0 ] ;then 325 echo "PASS: flow offloaded for ns1/ns2 with NAT" 326else 327 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 328 ip netns exec nsr1 nft list ruleset 329 ret=1 330fi 331 332# Third test: 333# Same as second test, but with PMTU discovery enabled. 334handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) 335 336ip netns exec nsr1 nft delete rule inet filter forward $handle 337if [ $? -ne 0 ] ;then 338 echo "FAIL: Could not delete large-packet accept rule" 339 exit 1 340fi 341 342ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 343ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 344 345test_tcp_forwarding_nat ns1 ns2 346if [ $? -eq 0 ] ;then 347 echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" 348else 349 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 350 ip netns exec nsr1 nft list ruleset 351fi 352 353KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) 354KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) 355SPI1=$RANDOM 356SPI2=$RANDOM 357 358if [ $SPI1 -eq $SPI2 ]; then 359 SPI2=$((SPI2+1)) 360fi 361 362do_esp() { 363 local ns=$1 364 local me=$2 365 local remote=$3 366 local lnet=$4 367 local rnet=$5 368 local spi_out=$6 369 local spi_in=$7 370 371 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet 372 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet 373 374 # to encrypt packets as they go out (includes forwarded packets that need encapsulation) 375 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow 376 # to fwd decrypted packets after esp processing: 377 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow 378 379} 380 381do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 382 383do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 384 385ip netns exec nsr1 nft delete table ip nat 386 387# restore default routes 388ip -net ns2 route del 192.168.10.1 via 10.0.2.1 389ip -net ns2 route add default via 10.0.2.1 390ip -net ns2 route add default via dead:2::1 391 392test_tcp_forwarding ns1 ns2 393if [ $? -eq 0 ] ;then 394 echo "PASS: ipsec tunnel mode for ns1/ns2" 395else 396 echo "FAIL: ipsec tunnel mode for ns1/ns2" 397 ip netns exec nsr1 nft list ruleset 1>&2 398 ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 399fi 400 401exit $ret 402