xref: /openbmc/linux/net/ipv6/ioam6_iptunnel.c (revision 0a907292)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  *  IPv6 IOAM Lightweight Tunnel implementation
4  *
5  *  Author:
6  *  Justin Iurman <justin.iurman@uliege.be>
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/skbuff.h>
11 #include <linux/net.h>
12 #include <linux/in6.h>
13 #include <linux/ioam6.h>
14 #include <linux/ioam6_iptunnel.h>
15 #include <net/dst.h>
16 #include <net/sock.h>
17 #include <net/lwtunnel.h>
18 #include <net/ioam6.h>
19 #include <net/netlink.h>
20 #include <net/ipv6.h>
21 #include <net/dst_cache.h>
22 #include <net/ip6_route.h>
23 #include <net/addrconf.h>
24 
25 #define IOAM6_MASK_SHORT_FIELDS 0xff100000
26 #define IOAM6_MASK_WIDE_FIELDS 0xe00000
27 
28 struct ioam6_lwt_encap {
29 	struct ipv6_hopopt_hdr eh;
30 	u8 pad[2];			/* 2-octet padding for 4n-alignment */
31 	struct ioam6_hdr ioamh;
32 	struct ioam6_trace_hdr traceh;
33 } __packed;
34 
35 struct ioam6_lwt {
36 	struct dst_cache cache;
37 	u8 mode;
38 	struct in6_addr tundst;
39 	struct ioam6_lwt_encap	tuninfo;
40 };
41 
42 static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
43 {
44 	return (struct ioam6_lwt *)lwt->data;
45 }
46 
47 static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
48 {
49 	return &ioam6_lwt_state(lwt)->tuninfo;
50 }
51 
52 static struct ioam6_trace_hdr *ioam6_lwt_trace(struct lwtunnel_state *lwt)
53 {
54 	return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
55 }
56 
57 static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
58 	[IOAM6_IPTUNNEL_MODE]	= NLA_POLICY_RANGE(NLA_U8,
59 						   IOAM6_IPTUNNEL_MODE_MIN,
60 						   IOAM6_IPTUNNEL_MODE_MAX),
61 	[IOAM6_IPTUNNEL_DST]	= NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
62 	[IOAM6_IPTUNNEL_TRACE]	= NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
63 };
64 
65 static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
66 {
67 	u32 fields;
68 
69 	if (!trace->type_be32 || !trace->remlen ||
70 	    trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 ||
71 	    trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
72 	    trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
73 	    trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
74 	    trace->type.bit21)
75 		return false;
76 
77 	trace->nodelen = 0;
78 	fields = be32_to_cpu(trace->type_be32);
79 
80 	trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
81 				* (sizeof(__be32) / 4);
82 	trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
83 				* (sizeof(__be64) / 4);
84 
85 	return true;
86 }
87 
88 static int ioam6_build_state(struct net *net, struct nlattr *nla,
89 			     unsigned int family, const void *cfg,
90 			     struct lwtunnel_state **ts,
91 			     struct netlink_ext_ack *extack)
92 {
93 	struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
94 	struct ioam6_lwt_encap *tuninfo;
95 	struct ioam6_trace_hdr *trace;
96 	struct lwtunnel_state *lwt;
97 	struct ioam6_lwt *ilwt;
98 	int len_aligned, err;
99 	u8 mode;
100 
101 	if (family != AF_INET6)
102 		return -EINVAL;
103 
104 	err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
105 			       ioam6_iptunnel_policy, extack);
106 	if (err < 0)
107 		return err;
108 
109 	if (!tb[IOAM6_IPTUNNEL_MODE])
110 		mode = IOAM6_IPTUNNEL_MODE_INLINE;
111 	else
112 		mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
113 
114 	if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
115 		NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
116 		return -EINVAL;
117 	}
118 
119 	if (!tb[IOAM6_IPTUNNEL_TRACE]) {
120 		NL_SET_ERR_MSG(extack, "missing trace");
121 		return -EINVAL;
122 	}
123 
124 	trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
125 	if (!ioam6_validate_trace_hdr(trace)) {
126 		NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
127 				    "invalid trace validation");
128 		return -EINVAL;
129 	}
130 
131 	len_aligned = ALIGN(trace->remlen * 4, 8);
132 	lwt = lwtunnel_state_alloc(sizeof(*ilwt) + len_aligned);
133 	if (!lwt)
134 		return -ENOMEM;
135 
136 	ilwt = ioam6_lwt_state(lwt);
137 	err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
138 	if (err) {
139 		kfree(lwt);
140 		return err;
141 	}
142 
143 	ilwt->mode = mode;
144 	if (tb[IOAM6_IPTUNNEL_DST])
145 		ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
146 
147 	tuninfo = ioam6_lwt_info(lwt);
148 	tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
149 	tuninfo->pad[0] = IPV6_TLV_PADN;
150 	tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
151 	tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
152 	tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
153 					+ trace->remlen * 4;
154 
155 	memcpy(&tuninfo->traceh, trace, sizeof(*trace));
156 
157 	if (len_aligned - trace->remlen * 4) {
158 		tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
159 		tuninfo->traceh.data[trace->remlen * 4 + 1] = 2;
160 	}
161 
162 	lwt->type = LWTUNNEL_ENCAP_IOAM6;
163 	lwt->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
164 
165 	*ts = lwt;
166 
167 	return 0;
168 }
169 
170 static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
171 {
172 	struct ioam6_trace_hdr *trace;
173 	struct ioam6_namespace *ns;
174 
175 	trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
176 					   + sizeof(struct ipv6_hopopt_hdr) + 2
177 					   + sizeof(struct ioam6_hdr));
178 
179 	ns = ioam6_namespace(net, trace->namespace_id);
180 	if (ns)
181 		ioam6_fill_trace_data(skb, ns, trace, false);
182 
183 	return 0;
184 }
185 
186 static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
187 			   struct ioam6_lwt_encap *tuninfo)
188 {
189 	struct ipv6hdr *oldhdr, *hdr;
190 	int hdrlen, err;
191 
192 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
193 
194 	err = skb_cow_head(skb, hdrlen + skb->mac_len);
195 	if (unlikely(err))
196 		return err;
197 
198 	oldhdr = ipv6_hdr(skb);
199 	skb_pull(skb, sizeof(*oldhdr));
200 	skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
201 
202 	skb_push(skb, sizeof(*oldhdr) + hdrlen);
203 	skb_reset_network_header(skb);
204 	skb_mac_header_rebuild(skb);
205 
206 	hdr = ipv6_hdr(skb);
207 	memmove(hdr, oldhdr, sizeof(*oldhdr));
208 	tuninfo->eh.nexthdr = hdr->nexthdr;
209 
210 	skb_set_transport_header(skb, sizeof(*hdr));
211 	skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
212 
213 	memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
214 
215 	hdr->nexthdr = NEXTHDR_HOP;
216 	hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
217 
218 	return ioam6_do_fill(net, skb);
219 }
220 
221 static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
222 			  struct ioam6_lwt_encap *tuninfo,
223 			  struct in6_addr *tundst)
224 {
225 	struct dst_entry *dst = skb_dst(skb);
226 	struct ipv6hdr *hdr, *inner_hdr;
227 	int hdrlen, len, err;
228 
229 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
230 	len = sizeof(*hdr) + hdrlen;
231 
232 	err = skb_cow_head(skb, len + skb->mac_len);
233 	if (unlikely(err))
234 		return err;
235 
236 	inner_hdr = ipv6_hdr(skb);
237 
238 	skb_push(skb, len);
239 	skb_reset_network_header(skb);
240 	skb_mac_header_rebuild(skb);
241 	skb_set_transport_header(skb, sizeof(*hdr));
242 
243 	tuninfo->eh.nexthdr = NEXTHDR_IPV6;
244 	memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
245 
246 	hdr = ipv6_hdr(skb);
247 	memcpy(hdr, inner_hdr, sizeof(*hdr));
248 
249 	hdr->nexthdr = NEXTHDR_HOP;
250 	hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
251 	hdr->daddr = *tundst;
252 	ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
253 			   IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
254 
255 	skb_postpush_rcsum(skb, hdr, len);
256 
257 	return ioam6_do_fill(net, skb);
258 }
259 
260 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
261 {
262 	struct dst_entry *dst = skb_dst(skb);
263 	struct in6_addr orig_daddr;
264 	struct ioam6_lwt *ilwt;
265 	int err = -EINVAL;
266 
267 	if (skb->protocol != htons(ETH_P_IPV6))
268 		goto drop;
269 
270 	ilwt = ioam6_lwt_state(dst->lwtstate);
271 	orig_daddr = ipv6_hdr(skb)->daddr;
272 
273 	switch (ilwt->mode) {
274 	case IOAM6_IPTUNNEL_MODE_INLINE:
275 do_inline:
276 		/* Direct insertion - if there is no Hop-by-Hop yet */
277 		if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
278 			goto out;
279 
280 		err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
281 		if (unlikely(err))
282 			goto drop;
283 
284 		break;
285 	case IOAM6_IPTUNNEL_MODE_ENCAP:
286 do_encap:
287 		/* Encapsulation (ip6ip6) */
288 		err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
289 		if (unlikely(err))
290 			goto drop;
291 
292 		break;
293 	case IOAM6_IPTUNNEL_MODE_AUTO:
294 		/* Automatic (RFC8200 compliant):
295 		 *  - local packets -> INLINE mode
296 		 *  - in-transit packets -> ENCAP mode
297 		 */
298 		if (!skb->dev)
299 			goto do_inline;
300 
301 		goto do_encap;
302 	default:
303 		goto drop;
304 	}
305 
306 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
307 	if (unlikely(err))
308 		goto drop;
309 
310 	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
311 		preempt_disable();
312 		dst = dst_cache_get(&ilwt->cache);
313 		preempt_enable();
314 
315 		if (unlikely(!dst)) {
316 			struct ipv6hdr *hdr = ipv6_hdr(skb);
317 			struct flowi6 fl6;
318 
319 			memset(&fl6, 0, sizeof(fl6));
320 			fl6.daddr = hdr->daddr;
321 			fl6.saddr = hdr->saddr;
322 			fl6.flowlabel = ip6_flowinfo(hdr);
323 			fl6.flowi6_mark = skb->mark;
324 			fl6.flowi6_proto = hdr->nexthdr;
325 
326 			dst = ip6_route_output(net, NULL, &fl6);
327 			if (dst->error) {
328 				err = dst->error;
329 				dst_release(dst);
330 				goto drop;
331 			}
332 
333 			preempt_disable();
334 			dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
335 			preempt_enable();
336 		}
337 
338 		skb_dst_drop(skb);
339 		skb_dst_set(skb, dst);
340 
341 		return dst_output(net, sk, skb);
342 	}
343 out:
344 	return dst->lwtstate->orig_output(net, sk, skb);
345 drop:
346 	kfree_skb(skb);
347 	return err;
348 }
349 
350 static void ioam6_destroy_state(struct lwtunnel_state *lwt)
351 {
352 	dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
353 }
354 
355 static int ioam6_fill_encap_info(struct sk_buff *skb,
356 				 struct lwtunnel_state *lwtstate)
357 {
358 	struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
359 	int err;
360 
361 	err = nla_put_u8(skb, IOAM6_IPTUNNEL_MODE, ilwt->mode);
362 	if (err)
363 		goto ret;
364 
365 	if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
366 		err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
367 		if (err)
368 			goto ret;
369 	}
370 
371 	err = nla_put(skb, IOAM6_IPTUNNEL_TRACE, sizeof(ilwt->tuninfo.traceh),
372 		      &ilwt->tuninfo.traceh);
373 ret:
374 	return err;
375 }
376 
377 static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
378 {
379 	struct ioam6_lwt *ilwt = ioam6_lwt_state(lwtstate);
380 	int nlsize;
381 
382 	nlsize = nla_total_size(sizeof(ilwt->mode)) +
383 		  nla_total_size(sizeof(ilwt->tuninfo.traceh));
384 
385 	if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
386 		nlsize += nla_total_size(sizeof(ilwt->tundst));
387 
388 	return nlsize;
389 }
390 
391 static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
392 {
393 	struct ioam6_trace_hdr *trace_a = ioam6_lwt_trace(a);
394 	struct ioam6_trace_hdr *trace_b = ioam6_lwt_trace(b);
395 	struct ioam6_lwt *ilwt_a = ioam6_lwt_state(a);
396 	struct ioam6_lwt *ilwt_b = ioam6_lwt_state(b);
397 
398 	return (ilwt_a->mode != ilwt_b->mode ||
399 		(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
400 		 !ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
401 		trace_a->namespace_id != trace_b->namespace_id);
402 }
403 
404 static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
405 	.build_state		= ioam6_build_state,
406 	.destroy_state		= ioam6_destroy_state,
407 	.output		= ioam6_output,
408 	.fill_encap		= ioam6_fill_encap_info,
409 	.get_encap_size	= ioam6_encap_nlsize,
410 	.cmp_encap		= ioam6_encap_cmp,
411 	.owner			= THIS_MODULE,
412 };
413 
414 int __init ioam6_iptunnel_init(void)
415 {
416 	return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
417 }
418 
419 void ioam6_iptunnel_exit(void)
420 {
421 	lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
422 }
423