1#!/bin/sh
2
3# This script demonstrates interaction of conntrack and vrf.
4# The vrf driver calls the netfilter hooks again, with oif/iif
5# pointing at the VRF device.
6#
7# For ingress, this means first iteration has iifname of lower/real
8# device.  In this script, thats veth0.
9# Second iteration is iifname set to vrf device, tvrf in this script.
10#
11# For egress, this is reversed: first iteration has the vrf device,
12# second iteration is done with the lower/real/veth0 device.
13#
14# test_ct_zone_in demonstrates unexpected change of nftables
15# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
16# connection on VRF rcv"
17#
18# It was possible to assign conntrack zone to a packet (or mark it for
19# `notracking`) in the prerouting chain before conntrack, based on real iif.
20#
21# After the change, the zone assignment is lost and the zone is assigned based
22# on the VRF master interface (in case such a rule exists).
23# assignment is lost. Instead, assignment based on the `iif` matching
24# Thus it is impossible to distinguish packets based on the original
25# interface.
26#
27# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
28# that was supposed to be fixed by the commit mentioned above to make sure
29# that any fix to test case 1 won't break masquerade again.
30
31ksft_skip=4
32
33IP0=172.30.30.1
34IP1=172.30.30.2
35PFXL=30
36ret=0
37
38sfx=$(mktemp -u "XXXXXXXX")
39ns0="ns0-$sfx"
40ns1="ns1-$sfx"
41
42cleanup()
43{
44	ip netns pids $ns0 | xargs kill 2>/dev/null
45	ip netns pids $ns1 | xargs kill 2>/dev/null
46
47	ip netns del $ns0 $ns1
48}
49
50nft --version > /dev/null 2>&1
51if [ $? -ne 0 ];then
52	echo "SKIP: Could not run test without nft tool"
53	exit $ksft_skip
54fi
55
56ip -Version > /dev/null 2>&1
57if [ $? -ne 0 ];then
58	echo "SKIP: Could not run test without ip tool"
59	exit $ksft_skip
60fi
61
62ip netns add "$ns0"
63if [ $? -ne 0 ];then
64	echo "SKIP: Could not create net namespace $ns0"
65	exit $ksft_skip
66fi
67ip netns add "$ns1"
68
69trap cleanup EXIT
70
71ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
72ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
73ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
74
75ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
76if [ $? -ne 0 ];then
77	echo "SKIP: Could not add veth device"
78	exit $ksft_skip
79fi
80
81ip -net $ns0 li add tvrf type vrf table 9876
82if [ $? -ne 0 ];then
83	echo "SKIP: Could not add vrf device"
84	exit $ksft_skip
85fi
86
87ip -net $ns0 li set lo up
88
89ip -net $ns0 li set veth0 master tvrf
90ip -net $ns0 li set tvrf up
91ip -net $ns0 li set veth0 up
92ip -net $ns1 li set veth0 up
93
94ip -net $ns0 addr add $IP0/$PFXL dev veth0
95ip -net $ns1 addr add $IP1/$PFXL dev veth0
96
97ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
98if [ $? -ne 0 ];then
99	echo "SKIP: Could not start iperf3"
100	exit $ksft_skip
101fi
102
103# test vrf ingress handling.
104# The incoming connection should be placed in conntrack zone 1,
105# as decided by the first iteration of the ruleset.
106test_ct_zone_in()
107{
108ip netns exec $ns0 nft -f - <<EOF
109table testct {
110	chain rawpre {
111		type filter hook prerouting priority raw;
112
113		iif { veth0, tvrf } counter meta nftrace set 1
114		iif veth0 counter ct zone set 1 counter return
115		iif tvrf counter ct zone set 2 counter return
116		ip protocol icmp counter
117		notrack counter
118	}
119
120	chain rawout {
121		type filter hook output priority raw;
122
123		oif veth0 counter ct zone set 1 counter return
124		oif tvrf counter ct zone set 2 counter return
125		notrack counter
126	}
127}
128EOF
129	ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
130
131	# should be in zone 1, not zone 2
132	count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
133	if [ $count -eq 1 ]; then
134		echo "PASS: entry found in conntrack zone 1"
135	else
136		echo "FAIL: entry not found in conntrack zone 1"
137		count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
138		if [ $count -eq 1 ]; then
139			echo "FAIL: entry found in zone 2 instead"
140		else
141			echo "FAIL: entry not in zone 1 or 2, dumping table"
142			ip netns exec $ns0 conntrack -L
143			ip netns exec $ns0 nft list ruleset
144		fi
145	fi
146}
147
148# add masq rule that gets evaluated w. outif set to vrf device.
149# This tests the first iteration of the packet through conntrack,
150# oifname is the vrf device.
151test_masquerade_vrf()
152{
153	local qdisc=$1
154
155	if [ "$qdisc" != "default" ]; then
156		tc -net $ns0 qdisc add dev tvrf root $qdisc
157	fi
158
159	ip netns exec $ns0 conntrack -F 2>/dev/null
160
161ip netns exec $ns0 nft -f - <<EOF
162flush ruleset
163table ip nat {
164	chain rawout {
165		type filter hook output priority raw;
166
167		oif tvrf ct state untracked counter
168	}
169	chain postrouting2 {
170		type filter hook postrouting priority mangle;
171
172		oif tvrf ct state untracked counter
173	}
174	chain postrouting {
175		type nat hook postrouting priority 0;
176		# NB: masquerade should always be combined with 'oif(name) bla',
177		# lack of this is intentional here, we want to exercise double-snat.
178		ip saddr 172.30.30.0/30 counter masquerade random
179	}
180}
181EOF
182	ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
183	if [ $? -ne 0 ]; then
184		echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
185		ret=1
186		return
187	fi
188
189	# must also check that nat table was evaluated on second (lower device) iteration.
190	ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
191	ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
192	if [ $? -eq 0 ]; then
193		echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
194	else
195		echo "FAIL: vrf rules have unexpected counter value"
196		ret=1
197	fi
198
199	if [ "$qdisc" != "default" ]; then
200		tc -net $ns0 qdisc del dev tvrf root
201	fi
202}
203
204# add masq rule that gets evaluated w. outif set to veth device.
205# This tests the 2nd iteration of the packet through conntrack,
206# oifname is the lower device (veth0 in this case).
207test_masquerade_veth()
208{
209	ip netns exec $ns0 conntrack -F 2>/dev/null
210ip netns exec $ns0 nft -f - <<EOF
211flush ruleset
212table ip nat {
213	chain postrouting {
214		type nat hook postrouting priority 0;
215		meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
216	}
217}
218EOF
219	ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
220	if [ $? -ne 0 ]; then
221		echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
222		ret=1
223		return
224	fi
225
226	# must also check that nat table was evaluated on second (lower device) iteration.
227	ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
228	if [ $? -eq 0 ]; then
229		echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
230	else
231		echo "FAIL: vrf masq rule has unexpected counter value"
232		ret=1
233	fi
234}
235
236test_ct_zone_in
237test_masquerade_vrf "default"
238test_masquerade_vrf "pfifo"
239test_masquerade_veth
240
241exit $ret
242