xref: /openbmc/linux/net/ipv6/seg6_iptunnel.c (revision 68198dca)
1 /*
2  *  SR-IPv6 implementation
3  *
4  *  Author:
5  *  David Lebrun <david.lebrun@uclouvain.be>
6  *
7  *
8  *  This program is free software; you can redistribute it and/or
9  *        modify it under the terms of the GNU General Public License
10  *        as published by the Free Software Foundation; either version
11  *        2 of the License, or (at your option) any later version.
12  */
13 
14 #include <linux/types.h>
15 #include <linux/skbuff.h>
16 #include <linux/net.h>
17 #include <linux/module.h>
18 #include <net/ip.h>
19 #include <net/lwtunnel.h>
20 #include <net/netevent.h>
21 #include <net/netns/generic.h>
22 #include <net/ip6_fib.h>
23 #include <net/route.h>
24 #include <net/seg6.h>
25 #include <linux/seg6.h>
26 #include <linux/seg6_iptunnel.h>
27 #include <net/addrconf.h>
28 #include <net/ip6_route.h>
29 #include <net/dst_cache.h>
30 #ifdef CONFIG_IPV6_SEG6_HMAC
31 #include <net/seg6_hmac.h>
32 #endif
33 
34 struct seg6_lwt {
35 	struct dst_cache cache;
36 	struct seg6_iptunnel_encap tuninfo[0];
37 };
38 
39 static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
40 {
41 	return (struct seg6_lwt *)lwt->data;
42 }
43 
44 static inline struct seg6_iptunnel_encap *
45 seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
46 {
47 	return seg6_lwt_lwtunnel(lwt)->tuninfo;
48 }
49 
50 static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
51 	[SEG6_IPTUNNEL_SRH]	= { .type = NLA_BINARY },
52 };
53 
54 static int nla_put_srh(struct sk_buff *skb, int attrtype,
55 		       struct seg6_iptunnel_encap *tuninfo)
56 {
57 	struct seg6_iptunnel_encap *data;
58 	struct nlattr *nla;
59 	int len;
60 
61 	len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
62 
63 	nla = nla_reserve(skb, attrtype, len);
64 	if (!nla)
65 		return -EMSGSIZE;
66 
67 	data = nla_data(nla);
68 	memcpy(data, tuninfo, len);
69 
70 	return 0;
71 }
72 
73 static void set_tun_src(struct net *net, struct net_device *dev,
74 			struct in6_addr *daddr, struct in6_addr *saddr)
75 {
76 	struct seg6_pernet_data *sdata = seg6_pernet(net);
77 	struct in6_addr *tun_src;
78 
79 	rcu_read_lock();
80 
81 	tun_src = rcu_dereference(sdata->tun_src);
82 
83 	if (!ipv6_addr_any(tun_src)) {
84 		memcpy(saddr, tun_src, sizeof(struct in6_addr));
85 	} else {
86 		ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
87 				   saddr);
88 	}
89 
90 	rcu_read_unlock();
91 }
92 
93 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
94 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
95 {
96 	struct net *net = dev_net(skb_dst(skb)->dev);
97 	struct ipv6hdr *hdr, *inner_hdr;
98 	struct ipv6_sr_hdr *isrh;
99 	int hdrlen, tot_len, err;
100 
101 	hdrlen = (osrh->hdrlen + 1) << 3;
102 	tot_len = hdrlen + sizeof(*hdr);
103 
104 	err = skb_cow_head(skb, tot_len);
105 	if (unlikely(err))
106 		return err;
107 
108 	inner_hdr = ipv6_hdr(skb);
109 
110 	skb_push(skb, tot_len);
111 	skb_reset_network_header(skb);
112 	skb_mac_header_rebuild(skb);
113 	hdr = ipv6_hdr(skb);
114 
115 	/* inherit tc, flowlabel and hlim
116 	 * hlim will be decremented in ip6_forward() afterwards and
117 	 * decapsulation will overwrite inner hlim with outer hlim
118 	 */
119 
120 	if (skb->protocol == htons(ETH_P_IPV6)) {
121 		ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
122 			     ip6_flowlabel(inner_hdr));
123 		hdr->hop_limit = inner_hdr->hop_limit;
124 	} else {
125 		ip6_flow_hdr(hdr, 0, 0);
126 		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
127 	}
128 
129 	hdr->nexthdr = NEXTHDR_ROUTING;
130 
131 	isrh = (void *)hdr + sizeof(*hdr);
132 	memcpy(isrh, osrh, hdrlen);
133 
134 	isrh->nexthdr = proto;
135 
136 	hdr->daddr = isrh->segments[isrh->first_segment];
137 	set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
138 
139 #ifdef CONFIG_IPV6_SEG6_HMAC
140 	if (sr_has_hmac(isrh)) {
141 		err = seg6_push_hmac(net, &hdr->saddr, isrh);
142 		if (unlikely(err))
143 			return err;
144 	}
145 #endif
146 
147 	skb_postpush_rcsum(skb, hdr, tot_len);
148 
149 	return 0;
150 }
151 EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
152 
153 /* insert an SRH within an IPv6 packet, just after the IPv6 header */
154 int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
155 {
156 	struct ipv6hdr *hdr, *oldhdr;
157 	struct ipv6_sr_hdr *isrh;
158 	int hdrlen, err;
159 
160 	hdrlen = (osrh->hdrlen + 1) << 3;
161 
162 	err = skb_cow_head(skb, hdrlen);
163 	if (unlikely(err))
164 		return err;
165 
166 	oldhdr = ipv6_hdr(skb);
167 
168 	skb_pull(skb, sizeof(struct ipv6hdr));
169 	skb_postpull_rcsum(skb, skb_network_header(skb),
170 			   sizeof(struct ipv6hdr));
171 
172 	skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
173 	skb_reset_network_header(skb);
174 	skb_mac_header_rebuild(skb);
175 
176 	hdr = ipv6_hdr(skb);
177 
178 	memmove(hdr, oldhdr, sizeof(*hdr));
179 
180 	isrh = (void *)hdr + sizeof(*hdr);
181 	memcpy(isrh, osrh, hdrlen);
182 
183 	isrh->nexthdr = hdr->nexthdr;
184 	hdr->nexthdr = NEXTHDR_ROUTING;
185 
186 	isrh->segments[0] = hdr->daddr;
187 	hdr->daddr = isrh->segments[isrh->first_segment];
188 
189 #ifdef CONFIG_IPV6_SEG6_HMAC
190 	if (sr_has_hmac(isrh)) {
191 		struct net *net = dev_net(skb_dst(skb)->dev);
192 
193 		err = seg6_push_hmac(net, &hdr->saddr, isrh);
194 		if (unlikely(err))
195 			return err;
196 	}
197 #endif
198 
199 	skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
200 
201 	return 0;
202 }
203 EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
204 
205 static int seg6_do_srh(struct sk_buff *skb)
206 {
207 	struct dst_entry *dst = skb_dst(skb);
208 	struct seg6_iptunnel_encap *tinfo;
209 	int proto, err = 0;
210 
211 	tinfo = seg6_encap_lwtunnel(dst->lwtstate);
212 
213 	if (likely(!skb->encapsulation)) {
214 		skb_reset_inner_headers(skb);
215 		skb->encapsulation = 1;
216 	}
217 
218 	switch (tinfo->mode) {
219 	case SEG6_IPTUN_MODE_INLINE:
220 		if (skb->protocol != htons(ETH_P_IPV6))
221 			return -EINVAL;
222 
223 		err = seg6_do_srh_inline(skb, tinfo->srh);
224 		if (err)
225 			return err;
226 
227 		skb_reset_inner_headers(skb);
228 		break;
229 	case SEG6_IPTUN_MODE_ENCAP:
230 		if (skb->protocol == htons(ETH_P_IPV6))
231 			proto = IPPROTO_IPV6;
232 		else if (skb->protocol == htons(ETH_P_IP))
233 			proto = IPPROTO_IPIP;
234 		else
235 			return -EINVAL;
236 
237 		err = seg6_do_srh_encap(skb, tinfo->srh, proto);
238 		if (err)
239 			return err;
240 
241 		skb->protocol = htons(ETH_P_IPV6);
242 		break;
243 	case SEG6_IPTUN_MODE_L2ENCAP:
244 		if (!skb_mac_header_was_set(skb))
245 			return -EINVAL;
246 
247 		if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
248 			return -ENOMEM;
249 
250 		skb_mac_header_rebuild(skb);
251 		skb_push(skb, skb->mac_len);
252 
253 		err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
254 		if (err)
255 			return err;
256 
257 		skb->protocol = htons(ETH_P_IPV6);
258 		break;
259 	}
260 
261 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
262 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
263 
264 	skb_set_inner_protocol(skb, skb->protocol);
265 
266 	return 0;
267 }
268 
269 static int seg6_input(struct sk_buff *skb)
270 {
271 	struct dst_entry *orig_dst = skb_dst(skb);
272 	struct dst_entry *dst = NULL;
273 	struct seg6_lwt *slwt;
274 	int err;
275 
276 	err = seg6_do_srh(skb);
277 	if (unlikely(err)) {
278 		kfree_skb(skb);
279 		return err;
280 	}
281 
282 	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
283 
284 	preempt_disable();
285 	dst = dst_cache_get(&slwt->cache);
286 	preempt_enable();
287 
288 	skb_dst_drop(skb);
289 
290 	if (!dst) {
291 		ip6_route_input(skb);
292 		dst = skb_dst(skb);
293 		if (!dst->error) {
294 			preempt_disable();
295 			dst_cache_set_ip6(&slwt->cache, dst,
296 					  &ipv6_hdr(skb)->saddr);
297 			preempt_enable();
298 		}
299 	} else {
300 		skb_dst_set(skb, dst);
301 	}
302 
303 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
304 	if (unlikely(err))
305 		return err;
306 
307 	return dst_input(skb);
308 }
309 
310 static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
311 {
312 	struct dst_entry *orig_dst = skb_dst(skb);
313 	struct dst_entry *dst = NULL;
314 	struct seg6_lwt *slwt;
315 	int err = -EINVAL;
316 
317 	err = seg6_do_srh(skb);
318 	if (unlikely(err))
319 		goto drop;
320 
321 	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
322 
323 	preempt_disable();
324 	dst = dst_cache_get(&slwt->cache);
325 	preempt_enable();
326 
327 	if (unlikely(!dst)) {
328 		struct ipv6hdr *hdr = ipv6_hdr(skb);
329 		struct flowi6 fl6;
330 
331 		fl6.daddr = hdr->daddr;
332 		fl6.saddr = hdr->saddr;
333 		fl6.flowlabel = ip6_flowinfo(hdr);
334 		fl6.flowi6_mark = skb->mark;
335 		fl6.flowi6_proto = hdr->nexthdr;
336 
337 		dst = ip6_route_output(net, NULL, &fl6);
338 		if (dst->error) {
339 			err = dst->error;
340 			dst_release(dst);
341 			goto drop;
342 		}
343 
344 		preempt_disable();
345 		dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
346 		preempt_enable();
347 	}
348 
349 	skb_dst_drop(skb);
350 	skb_dst_set(skb, dst);
351 
352 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
353 	if (unlikely(err))
354 		goto drop;
355 
356 	return dst_output(net, sk, skb);
357 drop:
358 	kfree_skb(skb);
359 	return err;
360 }
361 
362 static int seg6_build_state(struct nlattr *nla,
363 			    unsigned int family, const void *cfg,
364 			    struct lwtunnel_state **ts,
365 			    struct netlink_ext_ack *extack)
366 {
367 	struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
368 	struct seg6_iptunnel_encap *tuninfo;
369 	struct lwtunnel_state *newts;
370 	int tuninfo_len, min_size;
371 	struct seg6_lwt *slwt;
372 	int err;
373 
374 	if (family != AF_INET && family != AF_INET6)
375 		return -EINVAL;
376 
377 	err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
378 			       seg6_iptunnel_policy, extack);
379 
380 	if (err < 0)
381 		return err;
382 
383 	if (!tb[SEG6_IPTUNNEL_SRH])
384 		return -EINVAL;
385 
386 	tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
387 	tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
388 
389 	/* tuninfo must contain at least the iptunnel encap structure,
390 	 * the SRH and one segment
391 	 */
392 	min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
393 		   sizeof(struct in6_addr);
394 	if (tuninfo_len < min_size)
395 		return -EINVAL;
396 
397 	switch (tuninfo->mode) {
398 	case SEG6_IPTUN_MODE_INLINE:
399 		if (family != AF_INET6)
400 			return -EINVAL;
401 
402 		break;
403 	case SEG6_IPTUN_MODE_ENCAP:
404 		break;
405 	case SEG6_IPTUN_MODE_L2ENCAP:
406 		break;
407 	default:
408 		return -EINVAL;
409 	}
410 
411 	/* verify that SRH is consistent */
412 	if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
413 		return -EINVAL;
414 
415 	newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
416 	if (!newts)
417 		return -ENOMEM;
418 
419 	slwt = seg6_lwt_lwtunnel(newts);
420 
421 	err = dst_cache_init(&slwt->cache, GFP_KERNEL);
422 	if (err) {
423 		kfree(newts);
424 		return err;
425 	}
426 
427 	memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
428 
429 	newts->type = LWTUNNEL_ENCAP_SEG6;
430 	newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
431 
432 	if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
433 		newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
434 
435 	newts->headroom = seg6_lwt_headroom(tuninfo);
436 
437 	*ts = newts;
438 
439 	return 0;
440 }
441 
442 static void seg6_destroy_state(struct lwtunnel_state *lwt)
443 {
444 	dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
445 }
446 
447 static int seg6_fill_encap_info(struct sk_buff *skb,
448 				struct lwtunnel_state *lwtstate)
449 {
450 	struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
451 
452 	if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
453 		return -EMSGSIZE;
454 
455 	return 0;
456 }
457 
458 static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
459 {
460 	struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
461 
462 	return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
463 }
464 
465 static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
466 {
467 	struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
468 	struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
469 	int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
470 
471 	if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
472 		return 1;
473 
474 	return memcmp(a_hdr, b_hdr, len);
475 }
476 
477 static const struct lwtunnel_encap_ops seg6_iptun_ops = {
478 	.build_state = seg6_build_state,
479 	.destroy_state = seg6_destroy_state,
480 	.output = seg6_output,
481 	.input = seg6_input,
482 	.fill_encap = seg6_fill_encap_info,
483 	.get_encap_size = seg6_encap_nlsize,
484 	.cmp_encap = seg6_encap_cmp,
485 	.owner = THIS_MODULE,
486 };
487 
488 int __init seg6_iptunnel_init(void)
489 {
490 	return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
491 }
492 
493 void seg6_iptunnel_exit(void)
494 {
495 	lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
496 }
497