xref: /openbmc/linux/net/netfilter/nft_tunnel.c (revision 14474950)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/seqlock.h>
6 #include <linux/netlink.h>
7 #include <linux/netfilter.h>
8 #include <linux/netfilter/nf_tables.h>
9 #include <net/netfilter/nf_tables.h>
10 #include <net/dst_metadata.h>
11 #include <net/ip_tunnels.h>
12 #include <net/vxlan.h>
13 #include <net/erspan.h>
14 #include <net/geneve.h>
15 
16 struct nft_tunnel {
17 	enum nft_tunnel_keys	key:8;
18 	enum nft_registers	dreg:8;
19 	enum nft_tunnel_mode	mode:8;
20 };
21 
22 static void nft_tunnel_get_eval(const struct nft_expr *expr,
23 				struct nft_regs *regs,
24 				const struct nft_pktinfo *pkt)
25 {
26 	const struct nft_tunnel *priv = nft_expr_priv(expr);
27 	u32 *dest = &regs->data[priv->dreg];
28 	struct ip_tunnel_info *tun_info;
29 
30 	tun_info = skb_tunnel_info(pkt->skb);
31 
32 	switch (priv->key) {
33 	case NFT_TUNNEL_PATH:
34 		if (!tun_info) {
35 			nft_reg_store8(dest, false);
36 			return;
37 		}
38 		if (priv->mode == NFT_TUNNEL_MODE_NONE ||
39 		    (priv->mode == NFT_TUNNEL_MODE_RX &&
40 		     !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
41 		    (priv->mode == NFT_TUNNEL_MODE_TX &&
42 		     (tun_info->mode & IP_TUNNEL_INFO_TX)))
43 			nft_reg_store8(dest, true);
44 		else
45 			nft_reg_store8(dest, false);
46 		break;
47 	case NFT_TUNNEL_ID:
48 		if (!tun_info) {
49 			regs->verdict.code = NFT_BREAK;
50 			return;
51 		}
52 		if (priv->mode == NFT_TUNNEL_MODE_NONE ||
53 		    (priv->mode == NFT_TUNNEL_MODE_RX &&
54 		     !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
55 		    (priv->mode == NFT_TUNNEL_MODE_TX &&
56 		     (tun_info->mode & IP_TUNNEL_INFO_TX)))
57 			*dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
58 		else
59 			regs->verdict.code = NFT_BREAK;
60 		break;
61 	default:
62 		WARN_ON(1);
63 		regs->verdict.code = NFT_BREAK;
64 	}
65 }
66 
67 static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = {
68 	[NFTA_TUNNEL_KEY]	= { .type = NLA_U32 },
69 	[NFTA_TUNNEL_DREG]	= { .type = NLA_U32 },
70 	[NFTA_TUNNEL_MODE]	= { .type = NLA_U32 },
71 };
72 
73 static int nft_tunnel_get_init(const struct nft_ctx *ctx,
74 			       const struct nft_expr *expr,
75 			       const struct nlattr * const tb[])
76 {
77 	struct nft_tunnel *priv = nft_expr_priv(expr);
78 	u32 len;
79 
80 	if (!tb[NFTA_TUNNEL_KEY] ||
81 	    !tb[NFTA_TUNNEL_DREG])
82 		return -EINVAL;
83 
84 	priv->key = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY]));
85 	switch (priv->key) {
86 	case NFT_TUNNEL_PATH:
87 		len = sizeof(u8);
88 		break;
89 	case NFT_TUNNEL_ID:
90 		len = sizeof(u32);
91 		break;
92 	default:
93 		return -EOPNOTSUPP;
94 	}
95 
96 	priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
97 
98 	if (tb[NFTA_TUNNEL_MODE]) {
99 		priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE]));
100 		if (priv->mode > NFT_TUNNEL_MODE_MAX)
101 			return -EOPNOTSUPP;
102 	} else {
103 		priv->mode = NFT_TUNNEL_MODE_NONE;
104 	}
105 
106 	return nft_validate_register_store(ctx, priv->dreg, NULL,
107 					   NFT_DATA_VALUE, len);
108 }
109 
110 static int nft_tunnel_get_dump(struct sk_buff *skb,
111 			       const struct nft_expr *expr)
112 {
113 	const struct nft_tunnel *priv = nft_expr_priv(expr);
114 
115 	if (nla_put_be32(skb, NFTA_TUNNEL_KEY, htonl(priv->key)))
116 		goto nla_put_failure;
117 	if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg))
118 		goto nla_put_failure;
119 	if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode)))
120 		goto nla_put_failure;
121 	return 0;
122 
123 nla_put_failure:
124 	return -1;
125 }
126 
127 static struct nft_expr_type nft_tunnel_type;
128 static const struct nft_expr_ops nft_tunnel_get_ops = {
129 	.type		= &nft_tunnel_type,
130 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_tunnel)),
131 	.eval		= nft_tunnel_get_eval,
132 	.init		= nft_tunnel_get_init,
133 	.dump		= nft_tunnel_get_dump,
134 };
135 
136 static struct nft_expr_type nft_tunnel_type __read_mostly = {
137 	.name		= "tunnel",
138 	.ops		= &nft_tunnel_get_ops,
139 	.policy		= nft_tunnel_policy,
140 	.maxattr	= NFTA_TUNNEL_MAX,
141 	.owner		= THIS_MODULE,
142 };
143 
144 struct nft_tunnel_opts {
145 	union {
146 		struct vxlan_metadata	vxlan;
147 		struct erspan_metadata	erspan;
148 		u8	data[IP_TUNNEL_OPTS_MAX];
149 	} u;
150 	u32	len;
151 	__be16	flags;
152 };
153 
154 struct nft_tunnel_obj {
155 	struct metadata_dst	*md;
156 	struct nft_tunnel_opts	opts;
157 };
158 
159 static const struct nla_policy nft_tunnel_ip_policy[NFTA_TUNNEL_KEY_IP_MAX + 1] = {
160 	[NFTA_TUNNEL_KEY_IP_SRC]	= { .type = NLA_U32 },
161 	[NFTA_TUNNEL_KEY_IP_DST]	= { .type = NLA_U32 },
162 };
163 
164 static int nft_tunnel_obj_ip_init(const struct nft_ctx *ctx,
165 				  const struct nlattr *attr,
166 				  struct ip_tunnel_info *info)
167 {
168 	struct nlattr *tb[NFTA_TUNNEL_KEY_IP_MAX + 1];
169 	int err;
170 
171 	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP_MAX, attr,
172 					  nft_tunnel_ip_policy, NULL);
173 	if (err < 0)
174 		return err;
175 
176 	if (!tb[NFTA_TUNNEL_KEY_IP_DST])
177 		return -EINVAL;
178 
179 	if (tb[NFTA_TUNNEL_KEY_IP_SRC])
180 		info->key.u.ipv4.src = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP_SRC]);
181 	if (tb[NFTA_TUNNEL_KEY_IP_DST])
182 		info->key.u.ipv4.dst = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP_DST]);
183 
184 	return 0;
185 }
186 
187 static const struct nla_policy nft_tunnel_ip6_policy[NFTA_TUNNEL_KEY_IP6_MAX + 1] = {
188 	[NFTA_TUNNEL_KEY_IP6_SRC]	= { .len = sizeof(struct in6_addr), },
189 	[NFTA_TUNNEL_KEY_IP6_DST]	= { .len = sizeof(struct in6_addr), },
190 	[NFTA_TUNNEL_KEY_IP6_FLOWLABEL]	= { .type = NLA_U32, }
191 };
192 
193 static int nft_tunnel_obj_ip6_init(const struct nft_ctx *ctx,
194 				   const struct nlattr *attr,
195 				   struct ip_tunnel_info *info)
196 {
197 	struct nlattr *tb[NFTA_TUNNEL_KEY_IP6_MAX + 1];
198 	int err;
199 
200 	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr,
201 					  nft_tunnel_ip6_policy, NULL);
202 	if (err < 0)
203 		return err;
204 
205 	if (!tb[NFTA_TUNNEL_KEY_IP6_DST])
206 		return -EINVAL;
207 
208 	if (tb[NFTA_TUNNEL_KEY_IP6_SRC]) {
209 		memcpy(&info->key.u.ipv6.src,
210 		       nla_data(tb[NFTA_TUNNEL_KEY_IP6_SRC]),
211 		       sizeof(struct in6_addr));
212 	}
213 	if (tb[NFTA_TUNNEL_KEY_IP6_DST]) {
214 		memcpy(&info->key.u.ipv6.dst,
215 		       nla_data(tb[NFTA_TUNNEL_KEY_IP6_DST]),
216 		       sizeof(struct in6_addr));
217 	}
218 	if (tb[NFTA_TUNNEL_KEY_IP6_FLOWLABEL])
219 		info->key.label = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP6_FLOWLABEL]);
220 
221 	info->mode |= IP_TUNNEL_INFO_IPV6;
222 
223 	return 0;
224 }
225 
226 static const struct nla_policy nft_tunnel_opts_vxlan_policy[NFTA_TUNNEL_KEY_VXLAN_MAX + 1] = {
227 	[NFTA_TUNNEL_KEY_VXLAN_GBP]	= { .type = NLA_U32 },
228 };
229 
230 static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr,
231 				     struct nft_tunnel_opts *opts)
232 {
233 	struct nlattr *tb[NFTA_TUNNEL_KEY_VXLAN_MAX + 1];
234 	int err;
235 
236 	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr,
237 					  nft_tunnel_opts_vxlan_policy, NULL);
238 	if (err < 0)
239 		return err;
240 
241 	if (!tb[NFTA_TUNNEL_KEY_VXLAN_GBP])
242 		return -EINVAL;
243 
244 	opts->u.vxlan.gbp = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_VXLAN_GBP]));
245 
246 	opts->len	= sizeof(struct vxlan_metadata);
247 	opts->flags	= TUNNEL_VXLAN_OPT;
248 
249 	return 0;
250 }
251 
252 static const struct nla_policy nft_tunnel_opts_erspan_policy[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1] = {
253 	[NFTA_TUNNEL_KEY_ERSPAN_VERSION]	= { .type = NLA_U32 },
254 	[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX]	= { .type = NLA_U32 },
255 	[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR]		= { .type = NLA_U8 },
256 	[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID]	= { .type = NLA_U8 },
257 };
258 
259 static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
260 				      struct nft_tunnel_opts *opts)
261 {
262 	struct nlattr *tb[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1];
263 	uint8_t hwid, dir;
264 	int err, version;
265 
266 	err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX,
267 					  attr, nft_tunnel_opts_erspan_policy,
268 					  NULL);
269 	if (err < 0)
270 		return err;
271 
272 	if (!tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION])
273 		 return -EINVAL;
274 
275 	version = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION]));
276 	switch (version) {
277 	case ERSPAN_VERSION:
278 		if (!tb[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX])
279 			return -EINVAL;
280 
281 		opts->u.erspan.u.index =
282 			nla_get_be32(tb[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX]);
283 		break;
284 	case ERSPAN_VERSION2:
285 		if (!tb[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] ||
286 		    !tb[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID])
287 			return -EINVAL;
288 
289 		hwid = nla_get_u8(tb[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID]);
290 		dir = nla_get_u8(tb[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR]);
291 
292 		set_hwid(&opts->u.erspan.u.md2, hwid);
293 		opts->u.erspan.u.md2.dir = dir;
294 		break;
295 	default:
296 		return -EOPNOTSUPP;
297 	}
298 	opts->u.erspan.version = version;
299 
300 	opts->len	= sizeof(struct erspan_metadata);
301 	opts->flags	= TUNNEL_ERSPAN_OPT;
302 
303 	return 0;
304 }
305 
306 static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = {
307 	[NFTA_TUNNEL_KEY_GENEVE_CLASS]	= { .type = NLA_U16 },
308 	[NFTA_TUNNEL_KEY_GENEVE_TYPE]	= { .type = NLA_U8 },
309 	[NFTA_TUNNEL_KEY_GENEVE_DATA]	= { .type = NLA_BINARY, .len = 128 },
310 };
311 
312 static int nft_tunnel_obj_geneve_init(const struct nlattr *attr,
313 				      struct nft_tunnel_opts *opts)
314 {
315 	struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len;
316 	struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1];
317 	int err, data_len;
318 
319 	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_GENEVE_MAX, attr,
320 			       nft_tunnel_opts_geneve_policy, NULL);
321 	if (err < 0)
322 		return err;
323 
324 	if (!tb[NFTA_TUNNEL_KEY_GENEVE_CLASS] ||
325 	    !tb[NFTA_TUNNEL_KEY_GENEVE_TYPE] ||
326 	    !tb[NFTA_TUNNEL_KEY_GENEVE_DATA])
327 		return -EINVAL;
328 
329 	attr = tb[NFTA_TUNNEL_KEY_GENEVE_DATA];
330 	data_len = nla_len(attr);
331 	if (data_len % 4)
332 		return -EINVAL;
333 
334 	opts->len += sizeof(*opt) + data_len;
335 	if (opts->len > IP_TUNNEL_OPTS_MAX)
336 		return -EINVAL;
337 
338 	memcpy(opt->opt_data, nla_data(attr), data_len);
339 	opt->length = data_len / 4;
340 	opt->opt_class = nla_get_be16(tb[NFTA_TUNNEL_KEY_GENEVE_CLASS]);
341 	opt->type = nla_get_u8(tb[NFTA_TUNNEL_KEY_GENEVE_TYPE]);
342 	opts->flags = TUNNEL_GENEVE_OPT;
343 
344 	return 0;
345 }
346 
347 static const struct nla_policy nft_tunnel_opts_policy[NFTA_TUNNEL_KEY_OPTS_MAX + 1] = {
348 	[NFTA_TUNNEL_KEY_OPTS_UNSPEC]	= {
349 		.strict_start_type = NFTA_TUNNEL_KEY_OPTS_GENEVE },
350 	[NFTA_TUNNEL_KEY_OPTS_VXLAN]	= { .type = NLA_NESTED, },
351 	[NFTA_TUNNEL_KEY_OPTS_ERSPAN]	= { .type = NLA_NESTED, },
352 	[NFTA_TUNNEL_KEY_OPTS_GENEVE]	= { .type = NLA_NESTED, },
353 };
354 
355 static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
356 				    const struct nlattr *attr,
357 				    struct ip_tunnel_info *info,
358 				    struct nft_tunnel_opts *opts)
359 {
360 	int err, rem, type = 0;
361 	struct nlattr *nla;
362 
363 	err = nla_validate_nested_deprecated(attr, NFTA_TUNNEL_KEY_OPTS_MAX,
364 					     nft_tunnel_opts_policy, NULL);
365 	if (err < 0)
366 		return err;
367 
368 	nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) {
369 		switch (nla_type(nla)) {
370 		case NFTA_TUNNEL_KEY_OPTS_VXLAN:
371 			if (type)
372 				return -EINVAL;
373 			err = nft_tunnel_obj_vxlan_init(nla, opts);
374 			if (err)
375 				return err;
376 			type = TUNNEL_VXLAN_OPT;
377 			break;
378 		case NFTA_TUNNEL_KEY_OPTS_ERSPAN:
379 			if (type)
380 				return -EINVAL;
381 			err = nft_tunnel_obj_erspan_init(nla, opts);
382 			if (err)
383 				return err;
384 			type = TUNNEL_ERSPAN_OPT;
385 			break;
386 		case NFTA_TUNNEL_KEY_OPTS_GENEVE:
387 			if (type && type != TUNNEL_GENEVE_OPT)
388 				return -EINVAL;
389 			err = nft_tunnel_obj_geneve_init(nla, opts);
390 			if (err)
391 				return err;
392 			type = TUNNEL_GENEVE_OPT;
393 			break;
394 		default:
395 			return -EOPNOTSUPP;
396 		}
397 	}
398 
399 	return err;
400 }
401 
402 static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] = {
403 	[NFTA_TUNNEL_KEY_IP]	= { .type = NLA_NESTED, },
404 	[NFTA_TUNNEL_KEY_IP6]	= { .type = NLA_NESTED, },
405 	[NFTA_TUNNEL_KEY_ID]	= { .type = NLA_U32, },
406 	[NFTA_TUNNEL_KEY_FLAGS]	= { .type = NLA_U32, },
407 	[NFTA_TUNNEL_KEY_TOS]	= { .type = NLA_U8, },
408 	[NFTA_TUNNEL_KEY_TTL]	= { .type = NLA_U8, },
409 	[NFTA_TUNNEL_KEY_SPORT]	= { .type = NLA_U16, },
410 	[NFTA_TUNNEL_KEY_DPORT]	= { .type = NLA_U16, },
411 	[NFTA_TUNNEL_KEY_OPTS]	= { .type = NLA_NESTED, },
412 };
413 
414 static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
415 			       const struct nlattr * const tb[],
416 			       struct nft_object *obj)
417 {
418 	struct nft_tunnel_obj *priv = nft_obj_data(obj);
419 	struct ip_tunnel_info info;
420 	struct metadata_dst *md;
421 	int err;
422 
423 	if (!tb[NFTA_TUNNEL_KEY_ID])
424 		return -EINVAL;
425 
426 	memset(&info, 0, sizeof(info));
427 	info.mode		= IP_TUNNEL_INFO_TX;
428 	info.key.tun_id		= key32_to_tunnel_id(nla_get_be32(tb[NFTA_TUNNEL_KEY_ID]));
429 	info.key.tun_flags	= TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
430 
431 	if (tb[NFTA_TUNNEL_KEY_IP]) {
432 		err = nft_tunnel_obj_ip_init(ctx, tb[NFTA_TUNNEL_KEY_IP], &info);
433 		if (err < 0)
434 			return err;
435 	} else if (tb[NFTA_TUNNEL_KEY_IP6]) {
436 		err = nft_tunnel_obj_ip6_init(ctx, tb[NFTA_TUNNEL_KEY_IP6], &info);
437 		if (err < 0)
438 			return err;
439 	} else {
440 		return -EINVAL;
441 	}
442 
443 	if (tb[NFTA_TUNNEL_KEY_SPORT]) {
444 		info.key.tp_src = nla_get_be16(tb[NFTA_TUNNEL_KEY_SPORT]);
445 	}
446 	if (tb[NFTA_TUNNEL_KEY_DPORT]) {
447 		info.key.tp_dst = nla_get_be16(tb[NFTA_TUNNEL_KEY_DPORT]);
448 	}
449 
450 	if (tb[NFTA_TUNNEL_KEY_FLAGS]) {
451 		u32 tun_flags;
452 
453 		tun_flags = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_FLAGS]));
454 		if (tun_flags & ~NFT_TUNNEL_F_MASK)
455 			return -EOPNOTSUPP;
456 
457 		if (tun_flags & NFT_TUNNEL_F_ZERO_CSUM_TX)
458 			info.key.tun_flags &= ~TUNNEL_CSUM;
459 		if (tun_flags & NFT_TUNNEL_F_DONT_FRAGMENT)
460 			info.key.tun_flags |= TUNNEL_DONT_FRAGMENT;
461 		if (tun_flags & NFT_TUNNEL_F_SEQ_NUMBER)
462 			info.key.tun_flags |= TUNNEL_SEQ;
463 	}
464 	if (tb[NFTA_TUNNEL_KEY_TOS])
465 		info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
466 	if (tb[NFTA_TUNNEL_KEY_TTL])
467 		info.key.ttl = nla_get_u8(tb[NFTA_TUNNEL_KEY_TTL]);
468 	else
469 		info.key.ttl = U8_MAX;
470 
471 	if (tb[NFTA_TUNNEL_KEY_OPTS]) {
472 		err = nft_tunnel_obj_opts_init(ctx, tb[NFTA_TUNNEL_KEY_OPTS],
473 					       &info, &priv->opts);
474 		if (err < 0)
475 			return err;
476 	}
477 
478 	md = metadata_dst_alloc(priv->opts.len, METADATA_IP_TUNNEL, GFP_KERNEL);
479 	if (!md)
480 		return -ENOMEM;
481 
482 	memcpy(&md->u.tun_info, &info, sizeof(info));
483 #ifdef CONFIG_DST_CACHE
484 	err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL);
485 	if (err < 0) {
486 		metadata_dst_free(md);
487 		return err;
488 	}
489 #endif
490 	ip_tunnel_info_opts_set(&md->u.tun_info, &priv->opts.u, priv->opts.len,
491 				priv->opts.flags);
492 	priv->md = md;
493 
494 	return 0;
495 }
496 
497 static inline void nft_tunnel_obj_eval(struct nft_object *obj,
498 				       struct nft_regs *regs,
499 				       const struct nft_pktinfo *pkt)
500 {
501 	struct nft_tunnel_obj *priv = nft_obj_data(obj);
502 	struct sk_buff *skb = pkt->skb;
503 
504 	skb_dst_drop(skb);
505 	dst_hold((struct dst_entry *) priv->md);
506 	skb_dst_set(skb, (struct dst_entry *) priv->md);
507 }
508 
509 static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info)
510 {
511 	struct nlattr *nest;
512 
513 	if (info->mode & IP_TUNNEL_INFO_IPV6) {
514 		nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP6);
515 		if (!nest)
516 			return -1;
517 
518 		if (nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_SRC,
519 				     &info->key.u.ipv6.src) < 0 ||
520 		    nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_DST,
521 				     &info->key.u.ipv6.dst) < 0 ||
522 		    nla_put_be32(skb, NFTA_TUNNEL_KEY_IP6_FLOWLABEL,
523 				 info->key.label)) {
524 			nla_nest_cancel(skb, nest);
525 			return -1;
526 		}
527 
528 		nla_nest_end(skb, nest);
529 	} else {
530 		nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP);
531 		if (!nest)
532 			return -1;
533 
534 		if (nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_SRC,
535 				    info->key.u.ipv4.src) < 0 ||
536 		    nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_DST,
537 				    info->key.u.ipv4.dst) < 0) {
538 			nla_nest_cancel(skb, nest);
539 			return -1;
540 		}
541 
542 		nla_nest_end(skb, nest);
543 	}
544 
545 	return 0;
546 }
547 
548 static int nft_tunnel_opts_dump(struct sk_buff *skb,
549 				struct nft_tunnel_obj *priv)
550 {
551 	struct nft_tunnel_opts *opts = &priv->opts;
552 	struct nlattr *nest, *inner;
553 
554 	nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS);
555 	if (!nest)
556 		return -1;
557 
558 	if (opts->flags & TUNNEL_VXLAN_OPT) {
559 		inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN);
560 		if (!inner)
561 			goto failure;
562 		if (nla_put_be32(skb, NFTA_TUNNEL_KEY_VXLAN_GBP,
563 				 htonl(opts->u.vxlan.gbp)))
564 			goto inner_failure;
565 		nla_nest_end(skb, inner);
566 	} else if (opts->flags & TUNNEL_ERSPAN_OPT) {
567 		inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN);
568 		if (!inner)
569 			goto failure;
570 		if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_VERSION,
571 				 htonl(opts->u.erspan.version)))
572 			goto inner_failure;
573 		switch (opts->u.erspan.version) {
574 		case ERSPAN_VERSION:
575 			if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX,
576 					 opts->u.erspan.u.index))
577 				goto inner_failure;
578 			break;
579 		case ERSPAN_VERSION2:
580 			if (nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_HWID,
581 				       get_hwid(&opts->u.erspan.u.md2)) ||
582 			    nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_DIR,
583 				       opts->u.erspan.u.md2.dir))
584 				goto inner_failure;
585 			break;
586 		}
587 		nla_nest_end(skb, inner);
588 	} else if (opts->flags & TUNNEL_GENEVE_OPT) {
589 		struct geneve_opt *opt;
590 		int offset = 0;
591 
592 		inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
593 		if (!inner)
594 			goto failure;
595 		while (opts->len > offset) {
596 			opt = (struct geneve_opt *)opts->u.data + offset;
597 			if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS,
598 					 opt->opt_class) ||
599 			    nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE,
600 				       opt->type) ||
601 			    nla_put(skb, NFTA_TUNNEL_KEY_GENEVE_DATA,
602 				    opt->length * 4, opt->opt_data))
603 				goto inner_failure;
604 			offset += sizeof(*opt) + opt->length * 4;
605 		}
606 		nla_nest_end(skb, inner);
607 	}
608 	nla_nest_end(skb, nest);
609 	return 0;
610 
611 inner_failure:
612 	nla_nest_cancel(skb, inner);
613 failure:
614 	nla_nest_cancel(skb, nest);
615 	return -1;
616 }
617 
618 static int nft_tunnel_ports_dump(struct sk_buff *skb,
619 				 struct ip_tunnel_info *info)
620 {
621 	if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, info->key.tp_src) < 0 ||
622 	    nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, info->key.tp_dst) < 0)
623 		return -1;
624 
625 	return 0;
626 }
627 
628 static int nft_tunnel_flags_dump(struct sk_buff *skb,
629 				 struct ip_tunnel_info *info)
630 {
631 	u32 flags = 0;
632 
633 	if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
634 		flags |= NFT_TUNNEL_F_DONT_FRAGMENT;
635 	if (!(info->key.tun_flags & TUNNEL_CSUM))
636 		flags |= NFT_TUNNEL_F_ZERO_CSUM_TX;
637 	if (info->key.tun_flags & TUNNEL_SEQ)
638 		flags |= NFT_TUNNEL_F_SEQ_NUMBER;
639 
640 	if (nla_put_be32(skb, NFTA_TUNNEL_KEY_FLAGS, htonl(flags)) < 0)
641 		return -1;
642 
643 	return 0;
644 }
645 
646 static int nft_tunnel_obj_dump(struct sk_buff *skb,
647 			       struct nft_object *obj, bool reset)
648 {
649 	struct nft_tunnel_obj *priv = nft_obj_data(obj);
650 	struct ip_tunnel_info *info = &priv->md->u.tun_info;
651 
652 	if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ID,
653 			 tunnel_id_to_key32(info->key.tun_id)) ||
654 	    nft_tunnel_ip_dump(skb, info) < 0 ||
655 	    nft_tunnel_ports_dump(skb, info) < 0 ||
656 	    nft_tunnel_flags_dump(skb, info) < 0 ||
657 	    nla_put_u8(skb, NFTA_TUNNEL_KEY_TOS, info->key.tos) ||
658 	    nla_put_u8(skb, NFTA_TUNNEL_KEY_TTL, info->key.ttl) ||
659 	    nft_tunnel_opts_dump(skb, priv) < 0)
660 		goto nla_put_failure;
661 
662 	return 0;
663 
664 nla_put_failure:
665 	return -1;
666 }
667 
668 static void nft_tunnel_obj_destroy(const struct nft_ctx *ctx,
669 				   struct nft_object *obj)
670 {
671 	struct nft_tunnel_obj *priv = nft_obj_data(obj);
672 
673 	metadata_dst_free(priv->md);
674 }
675 
676 static struct nft_object_type nft_tunnel_obj_type;
677 static const struct nft_object_ops nft_tunnel_obj_ops = {
678 	.type		= &nft_tunnel_obj_type,
679 	.size		= sizeof(struct nft_tunnel_obj),
680 	.eval		= nft_tunnel_obj_eval,
681 	.init		= nft_tunnel_obj_init,
682 	.destroy	= nft_tunnel_obj_destroy,
683 	.dump		= nft_tunnel_obj_dump,
684 };
685 
686 static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
687 	.type		= NFT_OBJECT_TUNNEL,
688 	.ops		= &nft_tunnel_obj_ops,
689 	.maxattr	= NFTA_TUNNEL_KEY_MAX,
690 	.policy		= nft_tunnel_key_policy,
691 	.owner		= THIS_MODULE,
692 };
693 
694 static int __init nft_tunnel_module_init(void)
695 {
696 	int err;
697 
698 	err = nft_register_expr(&nft_tunnel_type);
699 	if (err < 0)
700 		return err;
701 
702 	err = nft_register_obj(&nft_tunnel_obj_type);
703 	if (err < 0)
704 		nft_unregister_expr(&nft_tunnel_type);
705 
706 	return err;
707 }
708 
709 static void __exit nft_tunnel_module_exit(void)
710 {
711 	nft_unregister_obj(&nft_tunnel_obj_type);
712 	nft_unregister_expr(&nft_tunnel_type);
713 }
714 
715 module_init(nft_tunnel_module_init);
716 module_exit(nft_tunnel_module_exit);
717 
718 MODULE_LICENSE("GPL");
719 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
720 MODULE_ALIAS_NFT_EXPR("tunnel");
721 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_TUNNEL);
722 MODULE_DESCRIPTION("nftables tunnel expression support");
723