xref: /openbmc/linux/net/netfilter/nft_inner.c (revision 278002edb19bce2c628fafb0af936e77000f3a5b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/if_vlan.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/netlink.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter/nf_tables.h>
13 #include <net/netfilter/nf_tables_core.h>
14 #include <net/netfilter/nf_tables.h>
15 #include <net/netfilter/nft_meta.h>
16 #include <net/netfilter/nf_tables_offload.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <net/gre.h>
20 #include <net/geneve.h>
21 #include <net/ip.h>
22 #include <linux/icmpv6.h>
23 #include <linux/ip.h>
24 #include <linux/ipv6.h>
25 
26 static DEFINE_PER_CPU(struct nft_inner_tun_ctx, nft_pcpu_tun_ctx);
27 
28 /* Same layout as nft_expr but it embeds the private expression data area. */
29 struct __nft_expr {
30 	const struct nft_expr_ops	*ops;
31 	union {
32 		struct nft_payload	payload;
33 		struct nft_meta		meta;
34 	} __attribute__((aligned(__alignof__(u64))));
35 };
36 
37 enum {
38 	NFT_INNER_EXPR_PAYLOAD,
39 	NFT_INNER_EXPR_META,
40 };
41 
42 struct nft_inner {
43 	u8			flags;
44 	u8			hdrsize;
45 	u8			type;
46 	u8			expr_type;
47 
48 	struct __nft_expr	expr;
49 };
50 
nft_inner_parse_l2l3(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * ctx,u32 off)51 static int nft_inner_parse_l2l3(const struct nft_inner *priv,
52 				const struct nft_pktinfo *pkt,
53 				struct nft_inner_tun_ctx *ctx, u32 off)
54 {
55 	__be16 llproto, outer_llproto;
56 	u32 nhoff, thoff;
57 
58 	if (priv->flags & NFT_INNER_LL) {
59 		struct vlan_ethhdr *veth, _veth;
60 		struct ethhdr *eth, _eth;
61 		u32 hdrsize;
62 
63 		eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth);
64 		if (!eth)
65 			return -1;
66 
67 		switch (eth->h_proto) {
68 		case htons(ETH_P_IP):
69 		case htons(ETH_P_IPV6):
70 			llproto = eth->h_proto;
71 			hdrsize = sizeof(_eth);
72 			break;
73 		case htons(ETH_P_8021Q):
74 			veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth);
75 			if (!veth)
76 				return -1;
77 
78 			outer_llproto = veth->h_vlan_encapsulated_proto;
79 			llproto = veth->h_vlan_proto;
80 			hdrsize = sizeof(_veth);
81 			break;
82 		default:
83 			return -1;
84 		}
85 
86 		ctx->inner_lloff = off;
87 		ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL;
88 		off += hdrsize;
89 	} else {
90 		struct iphdr *iph;
91 		u32 _version;
92 
93 		iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version);
94 		if (!iph)
95 			return -1;
96 
97 		switch (iph->version) {
98 		case 4:
99 			llproto = htons(ETH_P_IP);
100 			break;
101 		case 6:
102 			llproto = htons(ETH_P_IPV6);
103 			break;
104 		default:
105 			return -1;
106 		}
107 	}
108 
109 	ctx->llproto = llproto;
110 	if (llproto == htons(ETH_P_8021Q))
111 		llproto = outer_llproto;
112 
113 	nhoff = off;
114 
115 	switch (llproto) {
116 	case htons(ETH_P_IP): {
117 		struct iphdr *iph, _iph;
118 
119 		iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph);
120 		if (!iph)
121 			return -1;
122 
123 		if (iph->ihl < 5 || iph->version != 4)
124 			return -1;
125 
126 		ctx->inner_nhoff = nhoff;
127 		ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH;
128 
129 		thoff = nhoff + (iph->ihl * 4);
130 		if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) {
131 			ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
132 			ctx->inner_thoff = thoff;
133 			ctx->l4proto = iph->protocol;
134 		}
135 		}
136 		break;
137 	case htons(ETH_P_IPV6): {
138 		struct ipv6hdr *ip6h, _ip6h;
139 		int fh_flags = IP6_FH_F_AUTH;
140 		unsigned short fragoff;
141 		int l4proto;
142 
143 		ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h);
144 		if (!ip6h)
145 			return -1;
146 
147 		if (ip6h->version != 6)
148 			return -1;
149 
150 		ctx->inner_nhoff = nhoff;
151 		ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH;
152 
153 		thoff = nhoff;
154 		l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags);
155 		if (l4proto < 0 || thoff > U16_MAX)
156 			return -1;
157 
158 		if (fragoff == 0) {
159 			thoff = nhoff + sizeof(_ip6h);
160 			ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
161 			ctx->inner_thoff = thoff;
162 			ctx->l4proto = l4proto;
163 		}
164 		}
165 		break;
166 	default:
167 		return -1;
168 	}
169 
170 	return 0;
171 }
172 
nft_inner_parse_tunhdr(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * ctx,u32 * off)173 static int nft_inner_parse_tunhdr(const struct nft_inner *priv,
174 				  const struct nft_pktinfo *pkt,
175 				  struct nft_inner_tun_ctx *ctx, u32 *off)
176 {
177 	if (pkt->tprot == IPPROTO_GRE) {
178 		ctx->inner_tunoff = pkt->thoff;
179 		ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN;
180 		return 0;
181 	}
182 
183 	if (pkt->tprot != IPPROTO_UDP)
184 		return -1;
185 
186 	ctx->inner_tunoff = *off;
187 	ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN;
188 	*off += priv->hdrsize;
189 
190 	switch (priv->type) {
191 	case NFT_INNER_GENEVE: {
192 		struct genevehdr *gnvh, _gnvh;
193 
194 		gnvh = skb_header_pointer(pkt->skb, pkt->inneroff,
195 					  sizeof(_gnvh), &_gnvh);
196 		if (!gnvh)
197 			return -1;
198 
199 		*off += gnvh->opt_len * 4;
200 		}
201 		break;
202 	default:
203 		break;
204 	}
205 
206 	return 0;
207 }
208 
nft_inner_parse(const struct nft_inner * priv,struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)209 static int nft_inner_parse(const struct nft_inner *priv,
210 			   struct nft_pktinfo *pkt,
211 			   struct nft_inner_tun_ctx *tun_ctx)
212 {
213 	u32 off = pkt->inneroff;
214 
215 	if (priv->flags & NFT_INNER_HDRSIZE &&
216 	    nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0)
217 		return -1;
218 
219 	if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) {
220 		if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0)
221 			return -1;
222 	} else if (priv->flags & NFT_INNER_TH) {
223 		tun_ctx->inner_thoff = off;
224 		tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
225 	}
226 
227 	tun_ctx->type = priv->type;
228 	tun_ctx->cookie = (unsigned long)pkt->skb;
229 	pkt->flags |= NFT_PKTINFO_INNER_FULL;
230 
231 	return 0;
232 }
233 
nft_inner_restore_tun_ctx(const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)234 static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt,
235 				      struct nft_inner_tun_ctx *tun_ctx)
236 {
237 	struct nft_inner_tun_ctx *this_cpu_tun_ctx;
238 
239 	local_bh_disable();
240 	this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
241 	if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) {
242 		local_bh_enable();
243 		return false;
244 	}
245 	*tun_ctx = *this_cpu_tun_ctx;
246 	local_bh_enable();
247 
248 	return true;
249 }
250 
nft_inner_save_tun_ctx(const struct nft_pktinfo * pkt,const struct nft_inner_tun_ctx * tun_ctx)251 static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt,
252 				   const struct nft_inner_tun_ctx *tun_ctx)
253 {
254 	struct nft_inner_tun_ctx *this_cpu_tun_ctx;
255 
256 	local_bh_disable();
257 	this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
258 	if (this_cpu_tun_ctx->cookie != tun_ctx->cookie)
259 		*this_cpu_tun_ctx = *tun_ctx;
260 	local_bh_enable();
261 }
262 
nft_inner_parse_needed(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)263 static bool nft_inner_parse_needed(const struct nft_inner *priv,
264 				   const struct nft_pktinfo *pkt,
265 				   struct nft_inner_tun_ctx *tun_ctx)
266 {
267 	if (!(pkt->flags & NFT_PKTINFO_INNER_FULL))
268 		return true;
269 
270 	if (!nft_inner_restore_tun_ctx(pkt, tun_ctx))
271 		return true;
272 
273 	if (priv->type != tun_ctx->type)
274 		return true;
275 
276 	return false;
277 }
278 
nft_inner_eval(const struct nft_expr * expr,struct nft_regs * regs,const struct nft_pktinfo * pkt)279 static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs,
280 			   const struct nft_pktinfo *pkt)
281 {
282 	const struct nft_inner *priv = nft_expr_priv(expr);
283 	struct nft_inner_tun_ctx tun_ctx = {};
284 
285 	if (nft_payload_inner_offset(pkt) < 0)
286 		goto err;
287 
288 	if (nft_inner_parse_needed(priv, pkt, &tun_ctx) &&
289 	    nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0)
290 		goto err;
291 
292 	switch (priv->expr_type) {
293 	case NFT_INNER_EXPR_PAYLOAD:
294 		nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
295 		break;
296 	case NFT_INNER_EXPR_META:
297 		nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
298 		break;
299 	default:
300 		WARN_ON_ONCE(1);
301 		goto err;
302 	}
303 	nft_inner_save_tun_ctx(pkt, &tun_ctx);
304 
305 	return;
306 err:
307 	regs->verdict.code = NFT_BREAK;
308 }
309 
310 static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = {
311 	[NFTA_INNER_NUM]	= { .type = NLA_U32 },
312 	[NFTA_INNER_FLAGS]	= { .type = NLA_U32 },
313 	[NFTA_INNER_HDRSIZE]	= { .type = NLA_U32 },
314 	[NFTA_INNER_TYPE]	= { .type = NLA_U32 },
315 	[NFTA_INNER_EXPR]	= { .type = NLA_NESTED },
316 };
317 
318 struct nft_expr_info {
319 	const struct nft_expr_ops	*ops;
320 	const struct nlattr		*attr;
321 	struct nlattr			*tb[NFT_EXPR_MAXATTR + 1];
322 };
323 
nft_inner_init(const struct nft_ctx * ctx,const struct nft_expr * expr,const struct nlattr * const tb[])324 static int nft_inner_init(const struct nft_ctx *ctx,
325 			  const struct nft_expr *expr,
326 			  const struct nlattr * const tb[])
327 {
328 	struct nft_inner *priv = nft_expr_priv(expr);
329 	u32 flags, hdrsize, type, num;
330 	struct nft_expr_info expr_info;
331 	int err;
332 
333 	if (!tb[NFTA_INNER_FLAGS] ||
334 	    !tb[NFTA_INNER_NUM] ||
335 	    !tb[NFTA_INNER_HDRSIZE] ||
336 	    !tb[NFTA_INNER_TYPE] ||
337 	    !tb[NFTA_INNER_EXPR])
338 		return -EINVAL;
339 
340 	flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS]));
341 	if (flags & ~NFT_INNER_MASK)
342 		return -EOPNOTSUPP;
343 
344 	num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM]));
345 	if (num != 0)
346 		return -EOPNOTSUPP;
347 
348 	hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE]));
349 	type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE]));
350 
351 	if (type > U8_MAX)
352 		return -EINVAL;
353 
354 	if (flags & NFT_INNER_HDRSIZE) {
355 		if (hdrsize == 0 || hdrsize > 64)
356 			return -EOPNOTSUPP;
357 	}
358 
359 	priv->flags = flags;
360 	priv->hdrsize = hdrsize;
361 	priv->type = type;
362 
363 	err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info);
364 	if (err < 0)
365 		return err;
366 
367 	priv->expr.ops = expr_info.ops;
368 
369 	if (!strcmp(expr_info.ops->type->name, "payload"))
370 		priv->expr_type = NFT_INNER_EXPR_PAYLOAD;
371 	else if (!strcmp(expr_info.ops->type->name, "meta"))
372 		priv->expr_type = NFT_INNER_EXPR_META;
373 	else
374 		return -EINVAL;
375 
376 	err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr,
377 				  (const struct nlattr * const*)expr_info.tb);
378 	if (err < 0)
379 		return err;
380 
381 	return 0;
382 }
383 
nft_inner_dump(struct sk_buff * skb,const struct nft_expr * expr,bool reset)384 static int nft_inner_dump(struct sk_buff *skb,
385 			  const struct nft_expr *expr, bool reset)
386 {
387 	const struct nft_inner *priv = nft_expr_priv(expr);
388 
389 	if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) ||
390 	    nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) ||
391 	    nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) ||
392 	    nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize)))
393 		goto nla_put_failure;
394 
395 	if (nft_expr_dump(skb, NFTA_INNER_EXPR,
396 			  (struct nft_expr *)&priv->expr, reset) < 0)
397 		goto nla_put_failure;
398 
399 	return 0;
400 
401 nla_put_failure:
402 	return -1;
403 }
404 
405 static const struct nft_expr_ops nft_inner_ops = {
406 	.type		= &nft_inner_type,
407 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_inner)),
408 	.eval		= nft_inner_eval,
409 	.init		= nft_inner_init,
410 	.dump		= nft_inner_dump,
411 };
412 
413 struct nft_expr_type nft_inner_type __read_mostly = {
414 	.name		= "inner",
415 	.ops		= &nft_inner_ops,
416 	.policy		= nft_inner_policy,
417 	.maxattr	= NFTA_INNER_MAX,
418 	.owner		= THIS_MODULE,
419 };
420