1#!/bin/bash
2#
3# Test connection tracking zone and NAT source port reallocation support.
4#
5
6# Kselftest framework requirement - SKIP code is 4.
7ksft_skip=4
8
9# Don't increase too much, 2000 clients should work
10# just fine but script can then take several minutes with
11# KASAN/debug builds.
12maxclients=100
13
14have_iperf=1
15ret=0
16
17# client1---.
18#            veth1-.
19#                  |
20#               NAT Gateway --veth0--> Server
21#                  | |
22#            veth2-' |
23# client2---'        |
24#  ....              |
25# clientX----vethX---'
26
27# All clients share identical IP address.
28# NAT Gateway uses policy routing and conntrack zones to isolate client
29# namespaces.  Each client connects to Server, each with colliding tuples:
30#   clientsaddr:10000 -> serveraddr:dport
31#   NAT Gateway is supposed to do port reallocation for each of the
32#   connections.
33
34sfx=$(mktemp -u "XXXXXXXX")
35gw="ns-gw-$sfx"
36cl1="ns-cl1-$sfx"
37cl2="ns-cl2-$sfx"
38srv="ns-srv-$sfx"
39
40v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
41v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
42v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
43v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
44v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
45v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
46
47cleanup()
48{
49	ip netns del $gw
50	ip netns del $srv
51	for i in $(seq 1 $maxclients); do
52		ip netns del ns-cl$i-$sfx 2>/dev/null
53	done
54
55	sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
56	sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
57	sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
58	sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
59	sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
60	sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
61}
62
63nft --version > /dev/null 2>&1
64if [ $? -ne 0 ];then
65	echo "SKIP: Could not run test without nft tool"
66	exit $ksft_skip
67fi
68
69ip -Version > /dev/null 2>&1
70if [ $? -ne 0 ];then
71	echo "SKIP: Could not run test without ip tool"
72	exit $ksft_skip
73fi
74
75conntrack -V > /dev/null 2>&1
76if [ $? -ne 0 ];then
77	echo "SKIP: Could not run test without conntrack tool"
78	exit $ksft_skip
79fi
80
81iperf3 -v >/dev/null 2>&1
82if [ $? -ne 0 ];then
83	have_iperf=0
84fi
85
86ip netns add "$gw"
87if [ $? -ne 0 ];then
88	echo "SKIP: Could not create net namespace $gw"
89	exit $ksft_skip
90fi
91ip -net "$gw" link set lo up
92
93trap cleanup EXIT
94
95ip netns add "$srv"
96if [ $? -ne 0 ];then
97	echo "SKIP: Could not create server netns $srv"
98	exit $ksft_skip
99fi
100
101ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
102ip -net "$gw" link set veth0 up
103ip -net "$srv" link set lo up
104ip -net "$srv" link set eth0 up
105
106sysctl -q net.ipv6.neigh.default.gc_thresh1=512  2>/dev/null
107sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
108sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
109sysctl -q net.ipv4.neigh.default.gc_thresh1=512  2>/dev/null
110sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
111sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
112
113for i in $(seq 1 $maxclients);do
114  cl="ns-cl$i-$sfx"
115
116  ip netns add "$cl"
117  if [ $? -ne 0 ];then
118     echo "SKIP: Could not create client netns $cl"
119     exit $ksft_skip
120  fi
121  ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
122  if [ $? -ne 0 ];then
123    echo "SKIP: No virtual ethernet pair device support in kernel"
124    exit $ksft_skip
125  fi
126done
127
128for i in $(seq 1 $maxclients);do
129  cl="ns-cl$i-$sfx"
130  echo netns exec "$cl" ip link set lo up
131  echo netns exec "$cl" ip link set eth0 up
132  echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
133  echo netns exec "$gw" ip link set veth$i up
134  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
135  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
136
137  # clients have same IP addresses.
138  echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
139  echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
140  echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
141  echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
142
143  # NB: same addresses on client-facing interfaces.
144  echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
145  echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
146
147  # gw: policy routing
148  echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
149  echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
150  echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
151  echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
152  echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
153done | ip -batch /dev/stdin
154
155ip -net "$gw" addr add 10.3.0.1/24 dev veth0
156ip -net "$gw" addr add dead:3::1/64 dev veth0
157
158ip -net "$srv" addr add 10.3.0.99/24 dev eth0
159ip -net "$srv" addr add dead:3::99/64 dev eth0
160
161ip netns exec $gw nft -f /dev/stdin<<EOF
162table inet raw {
163	map iiftomark {
164		type ifname : mark
165	}
166
167	map iiftozone {
168		typeof iifname : ct zone
169	}
170
171	set inicmp {
172		flags dynamic
173		type ipv4_addr . ifname . ipv4_addr
174	}
175	set inflows {
176		flags dynamic
177		type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
178	}
179
180	set inflows6 {
181		flags dynamic
182		type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
183	}
184
185	chain prerouting {
186		type filter hook prerouting priority -64000; policy accept;
187		ct original zone set meta iifname map @iiftozone
188		meta mark set meta iifname map @iiftomark
189
190		tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
191		add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
192		ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
193	}
194
195	chain nat_postrouting {
196		type nat hook postrouting priority 0; policy accept;
197                ct mark set meta mark meta oifname veth0 masquerade
198	}
199
200	chain mangle_prerouting {
201		type filter hook prerouting priority -100; policy accept;
202		ct direction reply meta mark set ct mark
203	}
204}
205EOF
206
207( echo add element inet raw iiftomark \{
208	for i in $(seq 1 $((maxclients-1))); do
209		echo \"veth$i\" : $i,
210	done
211	echo \"veth$maxclients\" : $maxclients \}
212	echo add element inet raw iiftozone \{
213	for i in $(seq 1 $((maxclients-1))); do
214		echo \"veth$i\" : $i,
215	done
216	echo \"veth$maxclients\" : $maxclients \}
217) | ip netns exec $gw nft -f /dev/stdin
218
219ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
220ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
221ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
222
223# useful for debugging: allows to use 'ping' from clients to gateway.
224ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
225ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
226
227for i in $(seq 1 $maxclients); do
228  cl="ns-cl$i-$sfx"
229  ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
230  if [ $? -ne 0 ]; then
231     echo FAIL: Ping failure from $cl 1>&2
232     ret=1
233     break
234  fi
235done
236
237wait
238
239for i in $(seq 1 $maxclients); do
240   ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
241   if [ $? -ne 0 ];then
242      ret=1
243      echo "FAIL: counter icmp mismatch for veth$i" 1>&2
244      ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
245      break
246   fi
247done
248
249ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
250if [ $? -ne 0 ];then
251    ret=1
252    echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
253    ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
254fi
255
256if  [ $ret -eq 0 ]; then
257	echo "PASS: ping test from all $maxclients namespaces"
258fi
259
260if [ $have_iperf -eq 0 ];then
261	echo "SKIP: iperf3 not installed"
262	if [ $ret -ne 0 ];then
263	    exit $ret
264	fi
265	exit $ksft_skip
266fi
267
268ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
269iperfpid=$!
270sleep 1
271
272for i in $(seq 1 $maxclients); do
273  if [ $ret -ne 0 ]; then
274     break
275  fi
276  cl="ns-cl$i-$sfx"
277  ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
278  if [ $? -ne 0 ]; then
279     echo FAIL: Failure to connect for $cl 1>&2
280     ip netns exec $gw conntrack -S 1>&2
281     ret=1
282  fi
283done
284if [ $ret -eq 0 ];then
285	echo "PASS: iperf3 connections for all $maxclients net namespaces"
286fi
287
288kill $iperfpid
289wait
290
291for i in $(seq 1 $maxclients); do
292   ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
293   if [ $? -ne 0 ];then
294      ret=1
295      echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
296      break
297   fi
298done
299if [ $ret -eq 0 ];then
300	echo "PASS: Found client connection for all $maxclients net namespaces"
301fi
302
303ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
304if [ $? -ne 0 ];then
305    ret=1
306    echo "FAIL: cannot find return entry on veth0" 1>&2
307fi
308
309exit $ret
310