xref: /openbmc/linux/net/ipv6/exthdrs.c (revision 133f9794)
1 /*
2  *	Extension Header handling for IPv6
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *	Andi Kleen		<ak@muc.de>
8  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /* Changes:
17  *	yoshfuji		: ensure not to overrun while parsing
18  *				  tlv options.
19  *	Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
20  *	YOSHIFUJI Hideaki @USAGI  Register inbound extension header
21  *				  handlers as inet6_protocol{}.
22  */
23 
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/netdevice.h>
30 #include <linux/in6.h>
31 #include <linux/icmpv6.h>
32 #include <linux/slab.h>
33 #include <linux/export.h>
34 
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/snmp.h>
38 
39 #include <net/ipv6.h>
40 #include <net/protocol.h>
41 #include <net/transp_v6.h>
42 #include <net/rawv6.h>
43 #include <net/ndisc.h>
44 #include <net/ip6_route.h>
45 #include <net/addrconf.h>
46 #include <net/calipso.h>
47 #if IS_ENABLED(CONFIG_IPV6_MIP6)
48 #include <net/xfrm.h>
49 #endif
50 #include <linux/seg6.h>
51 #include <net/seg6.h>
52 #ifdef CONFIG_IPV6_SEG6_HMAC
53 #include <net/seg6_hmac.h>
54 #endif
55 
56 #include <linux/uaccess.h>
57 
58 /*
59  *	Parsing tlv encoded headers.
60  *
61  *	Parsing function "func" returns true, if parsing succeed
62  *	and false, if it failed.
63  *	It MUST NOT touch skb->h.
64  */
65 
66 struct tlvtype_proc {
67 	int	type;
68 	bool	(*func)(struct sk_buff *skb, int offset);
69 };
70 
71 /*********************
72   Generic functions
73  *********************/
74 
75 /* An unknown option is detected, decide what to do */
76 
77 static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
78 			       bool disallow_unknowns)
79 {
80 	if (disallow_unknowns) {
81 		/* If unknown TLVs are disallowed by configuration
82 		 * then always silently drop packet. Note this also
83 		 * means no ICMP parameter problem is sent which
84 		 * could be a good property to mitigate a reflection DOS
85 		 * attack.
86 		 */
87 
88 		goto drop;
89 	}
90 
91 	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
92 	case 0: /* ignore */
93 		return true;
94 
95 	case 1: /* drop packet */
96 		break;
97 
98 	case 3: /* Send ICMP if not a multicast address and drop packet */
99 		/* Actually, it is redundant check. icmp_send
100 		   will recheck in any case.
101 		 */
102 		if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
103 			break;
104 		/* fall through */
105 	case 2: /* send ICMP PARM PROB regardless and drop packet */
106 		icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
107 		return false;
108 	}
109 
110 drop:
111 	kfree_skb(skb);
112 	return false;
113 }
114 
115 /* Parse tlv encoded option header (hop-by-hop or destination) */
116 
117 static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
118 			  struct sk_buff *skb,
119 			  int max_count)
120 {
121 	int len = (skb_transport_header(skb)[1] + 1) << 3;
122 	const unsigned char *nh = skb_network_header(skb);
123 	int off = skb_network_header_len(skb);
124 	const struct tlvtype_proc *curr;
125 	bool disallow_unknowns = false;
126 	int tlv_count = 0;
127 	int padlen = 0;
128 
129 	if (unlikely(max_count < 0)) {
130 		disallow_unknowns = true;
131 		max_count = -max_count;
132 	}
133 
134 	if (skb_transport_offset(skb) + len > skb_headlen(skb))
135 		goto bad;
136 
137 	off += 2;
138 	len -= 2;
139 
140 	while (len > 0) {
141 		int optlen = nh[off + 1] + 2;
142 		int i;
143 
144 		switch (nh[off]) {
145 		case IPV6_TLV_PAD1:
146 			optlen = 1;
147 			padlen++;
148 			if (padlen > 7)
149 				goto bad;
150 			break;
151 
152 		case IPV6_TLV_PADN:
153 			/* RFC 2460 states that the purpose of PadN is
154 			 * to align the containing header to multiples
155 			 * of 8. 7 is therefore the highest valid value.
156 			 * See also RFC 4942, Section 2.1.9.5.
157 			 */
158 			padlen += optlen;
159 			if (padlen > 7)
160 				goto bad;
161 			/* RFC 4942 recommends receiving hosts to
162 			 * actively check PadN payload to contain
163 			 * only zeroes.
164 			 */
165 			for (i = 2; i < optlen; i++) {
166 				if (nh[off + i] != 0)
167 					goto bad;
168 			}
169 			break;
170 
171 		default: /* Other TLV code so scan list */
172 			if (optlen > len)
173 				goto bad;
174 
175 			tlv_count++;
176 			if (tlv_count > max_count)
177 				goto bad;
178 
179 			for (curr = procs; curr->type >= 0; curr++) {
180 				if (curr->type == nh[off]) {
181 					/* type specific length/alignment
182 					   checks will be performed in the
183 					   func(). */
184 					if (curr->func(skb, off) == false)
185 						return false;
186 					break;
187 				}
188 			}
189 			if (curr->type < 0 &&
190 			    !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
191 				return false;
192 
193 			padlen = 0;
194 			break;
195 		}
196 		off += optlen;
197 		len -= optlen;
198 	}
199 
200 	if (len == 0)
201 		return true;
202 bad:
203 	kfree_skb(skb);
204 	return false;
205 }
206 
207 /*****************************
208   Destination options header.
209  *****************************/
210 
211 #if IS_ENABLED(CONFIG_IPV6_MIP6)
212 static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
213 {
214 	struct ipv6_destopt_hao *hao;
215 	struct inet6_skb_parm *opt = IP6CB(skb);
216 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
217 	int ret;
218 
219 	if (opt->dsthao) {
220 		net_dbg_ratelimited("hao duplicated\n");
221 		goto discard;
222 	}
223 	opt->dsthao = opt->dst1;
224 	opt->dst1 = 0;
225 
226 	hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
227 
228 	if (hao->length != 16) {
229 		net_dbg_ratelimited("hao invalid option length = %d\n",
230 				    hao->length);
231 		goto discard;
232 	}
233 
234 	if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
235 		net_dbg_ratelimited("hao is not an unicast addr: %pI6\n",
236 				    &hao->addr);
237 		goto discard;
238 	}
239 
240 	ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
241 			       (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
242 	if (unlikely(ret < 0))
243 		goto discard;
244 
245 	if (skb_cloned(skb)) {
246 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
247 			goto discard;
248 
249 		/* update all variable using below by copied skbuff */
250 		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) +
251 						  optoff);
252 		ipv6h = ipv6_hdr(skb);
253 	}
254 
255 	if (skb->ip_summed == CHECKSUM_COMPLETE)
256 		skb->ip_summed = CHECKSUM_NONE;
257 
258 	swap(ipv6h->saddr, hao->addr);
259 
260 	if (skb->tstamp == 0)
261 		__net_timestamp(skb);
262 
263 	return true;
264 
265  discard:
266 	kfree_skb(skb);
267 	return false;
268 }
269 #endif
270 
271 static const struct tlvtype_proc tlvprocdestopt_lst[] = {
272 #if IS_ENABLED(CONFIG_IPV6_MIP6)
273 	{
274 		.type	= IPV6_TLV_HAO,
275 		.func	= ipv6_dest_hao,
276 	},
277 #endif
278 	{-1,			NULL}
279 };
280 
281 static int ipv6_destopt_rcv(struct sk_buff *skb)
282 {
283 	struct inet6_skb_parm *opt = IP6CB(skb);
284 #if IS_ENABLED(CONFIG_IPV6_MIP6)
285 	__u16 dstbuf;
286 #endif
287 	struct dst_entry *dst = skb_dst(skb);
288 	struct net *net = dev_net(skb->dev);
289 	int extlen;
290 
291 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
292 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
293 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
294 		__IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
295 				IPSTATS_MIB_INHDRERRORS);
296 fail_and_free:
297 		kfree_skb(skb);
298 		return -1;
299 	}
300 
301 	extlen = (skb_transport_header(skb)[1] + 1) << 3;
302 	if (extlen > net->ipv6.sysctl.max_dst_opts_len)
303 		goto fail_and_free;
304 
305 	opt->lastopt = opt->dst1 = skb_network_header_len(skb);
306 #if IS_ENABLED(CONFIG_IPV6_MIP6)
307 	dstbuf = opt->dst1;
308 #endif
309 
310 	if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
311 			  init_net.ipv6.sysctl.max_dst_opts_cnt)) {
312 		skb->transport_header += extlen;
313 		opt = IP6CB(skb);
314 #if IS_ENABLED(CONFIG_IPV6_MIP6)
315 		opt->nhoff = dstbuf;
316 #else
317 		opt->nhoff = opt->dst1;
318 #endif
319 		return 1;
320 	}
321 
322 	__IP6_INC_STATS(dev_net(dst->dev),
323 			ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
324 	return -1;
325 }
326 
327 static void seg6_update_csum(struct sk_buff *skb)
328 {
329 	struct ipv6_sr_hdr *hdr;
330 	struct in6_addr *addr;
331 	__be32 from, to;
332 
333 	/* srh is at transport offset and seg_left is already decremented
334 	 * but daddr is not yet updated with next segment
335 	 */
336 
337 	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
338 	addr = hdr->segments + hdr->segments_left;
339 
340 	hdr->segments_left++;
341 	from = *(__be32 *)hdr;
342 
343 	hdr->segments_left--;
344 	to = *(__be32 *)hdr;
345 
346 	/* update skb csum with diff resulting from seg_left decrement */
347 
348 	update_csum_diff4(skb, from, to);
349 
350 	/* compute csum diff between current and next segment and update */
351 
352 	update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr),
353 			   (__be32 *)addr);
354 }
355 
356 static int ipv6_srh_rcv(struct sk_buff *skb)
357 {
358 	struct inet6_skb_parm *opt = IP6CB(skb);
359 	struct net *net = dev_net(skb->dev);
360 	struct ipv6_sr_hdr *hdr;
361 	struct inet6_dev *idev;
362 	struct in6_addr *addr;
363 	int accept_seg6;
364 
365 	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
366 
367 	idev = __in6_dev_get(skb->dev);
368 
369 	accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
370 	if (accept_seg6 > idev->cnf.seg6_enabled)
371 		accept_seg6 = idev->cnf.seg6_enabled;
372 
373 	if (!accept_seg6) {
374 		kfree_skb(skb);
375 		return -1;
376 	}
377 
378 #ifdef CONFIG_IPV6_SEG6_HMAC
379 	if (!seg6_hmac_validate_skb(skb)) {
380 		kfree_skb(skb);
381 		return -1;
382 	}
383 #endif
384 
385 looped_back:
386 	if (hdr->segments_left == 0) {
387 		if (hdr->nexthdr == NEXTHDR_IPV6) {
388 			int offset = (hdr->hdrlen + 1) << 3;
389 
390 			skb_postpull_rcsum(skb, skb_network_header(skb),
391 					   skb_network_header_len(skb));
392 
393 			if (!pskb_pull(skb, offset)) {
394 				kfree_skb(skb);
395 				return -1;
396 			}
397 			skb_postpull_rcsum(skb, skb_transport_header(skb),
398 					   offset);
399 
400 			skb_reset_network_header(skb);
401 			skb_reset_transport_header(skb);
402 			skb->encapsulation = 0;
403 
404 			__skb_tunnel_rx(skb, skb->dev, net);
405 
406 			netif_rx(skb);
407 			return -1;
408 		}
409 
410 		opt->srcrt = skb_network_header_len(skb);
411 		opt->lastopt = opt->srcrt;
412 		skb->transport_header += (hdr->hdrlen + 1) << 3;
413 		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
414 
415 		return 1;
416 	}
417 
418 	if (hdr->segments_left >= (hdr->hdrlen >> 1)) {
419 		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
420 				IPSTATS_MIB_INHDRERRORS);
421 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
422 				  ((&hdr->segments_left) -
423 				   skb_network_header(skb)));
424 		return -1;
425 	}
426 
427 	if (skb_cloned(skb)) {
428 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
429 			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
430 					IPSTATS_MIB_OUTDISCARDS);
431 			kfree_skb(skb);
432 			return -1;
433 		}
434 	}
435 
436 	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
437 
438 	hdr->segments_left--;
439 	addr = hdr->segments + hdr->segments_left;
440 
441 	skb_push(skb, sizeof(struct ipv6hdr));
442 
443 	if (skb->ip_summed == CHECKSUM_COMPLETE)
444 		seg6_update_csum(skb);
445 
446 	ipv6_hdr(skb)->daddr = *addr;
447 
448 	skb_dst_drop(skb);
449 
450 	ip6_route_input(skb);
451 
452 	if (skb_dst(skb)->error) {
453 		dst_input(skb);
454 		return -1;
455 	}
456 
457 	if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
458 		if (ipv6_hdr(skb)->hop_limit <= 1) {
459 			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
460 					IPSTATS_MIB_INHDRERRORS);
461 			icmpv6_send(skb, ICMPV6_TIME_EXCEED,
462 				    ICMPV6_EXC_HOPLIMIT, 0);
463 			kfree_skb(skb);
464 			return -1;
465 		}
466 		ipv6_hdr(skb)->hop_limit--;
467 
468 		skb_pull(skb, sizeof(struct ipv6hdr));
469 		goto looped_back;
470 	}
471 
472 	dst_input(skb);
473 
474 	return -1;
475 }
476 
477 /********************************
478   Routing header.
479  ********************************/
480 
481 /* called with rcu_read_lock() */
482 static int ipv6_rthdr_rcv(struct sk_buff *skb)
483 {
484 	struct inet6_skb_parm *opt = IP6CB(skb);
485 	struct in6_addr *addr = NULL;
486 	struct in6_addr daddr;
487 	struct inet6_dev *idev;
488 	int n, i;
489 	struct ipv6_rt_hdr *hdr;
490 	struct rt0_hdr *rthdr;
491 	struct net *net = dev_net(skb->dev);
492 	int accept_source_route = net->ipv6.devconf_all->accept_source_route;
493 
494 	idev = __in6_dev_get(skb->dev);
495 	if (idev && accept_source_route > idev->cnf.accept_source_route)
496 		accept_source_route = idev->cnf.accept_source_route;
497 
498 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
499 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
500 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
501 		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
502 				IPSTATS_MIB_INHDRERRORS);
503 		kfree_skb(skb);
504 		return -1;
505 	}
506 
507 	hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
508 
509 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
510 	    skb->pkt_type != PACKET_HOST) {
511 		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
512 				IPSTATS_MIB_INADDRERRORS);
513 		kfree_skb(skb);
514 		return -1;
515 	}
516 
517 	/* segment routing */
518 	if (hdr->type == IPV6_SRCRT_TYPE_4)
519 		return ipv6_srh_rcv(skb);
520 
521 looped_back:
522 	if (hdr->segments_left == 0) {
523 		switch (hdr->type) {
524 #if IS_ENABLED(CONFIG_IPV6_MIP6)
525 		case IPV6_SRCRT_TYPE_2:
526 			/* Silently discard type 2 header unless it was
527 			 * processed by own
528 			 */
529 			if (!addr) {
530 				__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
531 						IPSTATS_MIB_INADDRERRORS);
532 				kfree_skb(skb);
533 				return -1;
534 			}
535 			break;
536 #endif
537 		default:
538 			break;
539 		}
540 
541 		opt->lastopt = opt->srcrt = skb_network_header_len(skb);
542 		skb->transport_header += (hdr->hdrlen + 1) << 3;
543 		opt->dst0 = opt->dst1;
544 		opt->dst1 = 0;
545 		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
546 		return 1;
547 	}
548 
549 	switch (hdr->type) {
550 #if IS_ENABLED(CONFIG_IPV6_MIP6)
551 	case IPV6_SRCRT_TYPE_2:
552 		if (accept_source_route < 0)
553 			goto unknown_rh;
554 		/* Silently discard invalid RTH type 2 */
555 		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
556 			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
557 					IPSTATS_MIB_INHDRERRORS);
558 			kfree_skb(skb);
559 			return -1;
560 		}
561 		break;
562 #endif
563 	default:
564 		goto unknown_rh;
565 	}
566 
567 	/*
568 	 *	This is the routing header forwarding algorithm from
569 	 *	RFC 2460, page 16.
570 	 */
571 
572 	n = hdr->hdrlen >> 1;
573 
574 	if (hdr->segments_left > n) {
575 		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
576 				IPSTATS_MIB_INHDRERRORS);
577 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
578 				  ((&hdr->segments_left) -
579 				   skb_network_header(skb)));
580 		return -1;
581 	}
582 
583 	/* We are about to mangle packet header. Be careful!
584 	   Do not damage packets queued somewhere.
585 	 */
586 	if (skb_cloned(skb)) {
587 		/* the copy is a forwarded packet */
588 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
589 			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
590 					IPSTATS_MIB_OUTDISCARDS);
591 			kfree_skb(skb);
592 			return -1;
593 		}
594 		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
595 	}
596 
597 	if (skb->ip_summed == CHECKSUM_COMPLETE)
598 		skb->ip_summed = CHECKSUM_NONE;
599 
600 	i = n - --hdr->segments_left;
601 
602 	rthdr = (struct rt0_hdr *) hdr;
603 	addr = rthdr->addr;
604 	addr += i - 1;
605 
606 	switch (hdr->type) {
607 #if IS_ENABLED(CONFIG_IPV6_MIP6)
608 	case IPV6_SRCRT_TYPE_2:
609 		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
610 				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
611 				     IPPROTO_ROUTING) < 0) {
612 			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
613 					IPSTATS_MIB_INADDRERRORS);
614 			kfree_skb(skb);
615 			return -1;
616 		}
617 		if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
618 			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
619 					IPSTATS_MIB_INADDRERRORS);
620 			kfree_skb(skb);
621 			return -1;
622 		}
623 		break;
624 #endif
625 	default:
626 		break;
627 	}
628 
629 	if (ipv6_addr_is_multicast(addr)) {
630 		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
631 				IPSTATS_MIB_INADDRERRORS);
632 		kfree_skb(skb);
633 		return -1;
634 	}
635 
636 	daddr = *addr;
637 	*addr = ipv6_hdr(skb)->daddr;
638 	ipv6_hdr(skb)->daddr = daddr;
639 
640 	skb_dst_drop(skb);
641 	ip6_route_input(skb);
642 	if (skb_dst(skb)->error) {
643 		skb_push(skb, skb->data - skb_network_header(skb));
644 		dst_input(skb);
645 		return -1;
646 	}
647 
648 	if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
649 		if (ipv6_hdr(skb)->hop_limit <= 1) {
650 			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
651 					IPSTATS_MIB_INHDRERRORS);
652 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
653 				    0);
654 			kfree_skb(skb);
655 			return -1;
656 		}
657 		ipv6_hdr(skb)->hop_limit--;
658 		goto looped_back;
659 	}
660 
661 	skb_push(skb, skb->data - skb_network_header(skb));
662 	dst_input(skb);
663 	return -1;
664 
665 unknown_rh:
666 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
667 	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
668 			  (&hdr->type) - skb_network_header(skb));
669 	return -1;
670 }
671 
672 static const struct inet6_protocol rthdr_protocol = {
673 	.handler	=	ipv6_rthdr_rcv,
674 	.flags		=	INET6_PROTO_NOPOLICY,
675 };
676 
677 static const struct inet6_protocol destopt_protocol = {
678 	.handler	=	ipv6_destopt_rcv,
679 	.flags		=	INET6_PROTO_NOPOLICY,
680 };
681 
682 static const struct inet6_protocol nodata_protocol = {
683 	.handler	=	dst_discard,
684 	.flags		=	INET6_PROTO_NOPOLICY,
685 };
686 
687 int __init ipv6_exthdrs_init(void)
688 {
689 	int ret;
690 
691 	ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING);
692 	if (ret)
693 		goto out;
694 
695 	ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
696 	if (ret)
697 		goto out_rthdr;
698 
699 	ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE);
700 	if (ret)
701 		goto out_destopt;
702 
703 out:
704 	return ret;
705 out_destopt:
706 	inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
707 out_rthdr:
708 	inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
709 	goto out;
710 };
711 
712 void ipv6_exthdrs_exit(void)
713 {
714 	inet6_del_protocol(&nodata_protocol, IPPROTO_NONE);
715 	inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
716 	inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
717 }
718 
719 /**********************************
720   Hop-by-hop options.
721  **********************************/
722 
723 /*
724  * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input().
725  */
726 static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb)
727 {
728 	return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev);
729 }
730 
731 static inline struct net *ipv6_skb_net(struct sk_buff *skb)
732 {
733 	return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev);
734 }
735 
736 /* Router Alert as of RFC 2711 */
737 
738 static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
739 {
740 	const unsigned char *nh = skb_network_header(skb);
741 
742 	if (nh[optoff + 1] == 2) {
743 		IP6CB(skb)->flags |= IP6SKB_ROUTERALERT;
744 		memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
745 		return true;
746 	}
747 	net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n",
748 			    nh[optoff + 1]);
749 	kfree_skb(skb);
750 	return false;
751 }
752 
753 /* Jumbo payload */
754 
755 static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
756 {
757 	const unsigned char *nh = skb_network_header(skb);
758 	struct net *net = ipv6_skb_net(skb);
759 	u32 pkt_len;
760 
761 	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
762 		net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
763 				    nh[optoff+1]);
764 		__IP6_INC_STATS(net, ipv6_skb_idev(skb),
765 				IPSTATS_MIB_INHDRERRORS);
766 		goto drop;
767 	}
768 
769 	pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
770 	if (pkt_len <= IPV6_MAXPLEN) {
771 		__IP6_INC_STATS(net, ipv6_skb_idev(skb),
772 				IPSTATS_MIB_INHDRERRORS);
773 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
774 		return false;
775 	}
776 	if (ipv6_hdr(skb)->payload_len) {
777 		__IP6_INC_STATS(net, ipv6_skb_idev(skb),
778 				IPSTATS_MIB_INHDRERRORS);
779 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
780 		return false;
781 	}
782 
783 	if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
784 		__IP6_INC_STATS(net, ipv6_skb_idev(skb),
785 				IPSTATS_MIB_INTRUNCATEDPKTS);
786 		goto drop;
787 	}
788 
789 	if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
790 		goto drop;
791 
792 	IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM;
793 	return true;
794 
795 drop:
796 	kfree_skb(skb);
797 	return false;
798 }
799 
800 /* CALIPSO RFC 5570 */
801 
802 static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff)
803 {
804 	const unsigned char *nh = skb_network_header(skb);
805 
806 	if (nh[optoff + 1] < 8)
807 		goto drop;
808 
809 	if (nh[optoff + 6] * 4 + 8 > nh[optoff + 1])
810 		goto drop;
811 
812 	if (!calipso_validate(skb, nh + optoff))
813 		goto drop;
814 
815 	return true;
816 
817 drop:
818 	kfree_skb(skb);
819 	return false;
820 }
821 
822 static const struct tlvtype_proc tlvprochopopt_lst[] = {
823 	{
824 		.type	= IPV6_TLV_ROUTERALERT,
825 		.func	= ipv6_hop_ra,
826 	},
827 	{
828 		.type	= IPV6_TLV_JUMBO,
829 		.func	= ipv6_hop_jumbo,
830 	},
831 	{
832 		.type	= IPV6_TLV_CALIPSO,
833 		.func	= ipv6_hop_calipso,
834 	},
835 	{ -1, }
836 };
837 
838 int ipv6_parse_hopopts(struct sk_buff *skb)
839 {
840 	struct inet6_skb_parm *opt = IP6CB(skb);
841 	struct net *net = dev_net(skb->dev);
842 	int extlen;
843 
844 	/*
845 	 * skb_network_header(skb) is equal to skb->data, and
846 	 * skb_network_header_len(skb) is always equal to
847 	 * sizeof(struct ipv6hdr) by definition of
848 	 * hop-by-hop options.
849 	 */
850 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
851 	    !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
852 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
853 fail_and_free:
854 		kfree_skb(skb);
855 		return -1;
856 	}
857 
858 	extlen = (skb_transport_header(skb)[1] + 1) << 3;
859 	if (extlen > net->ipv6.sysctl.max_hbh_opts_len)
860 		goto fail_and_free;
861 
862 	opt->flags |= IP6SKB_HOPBYHOP;
863 	if (ip6_parse_tlv(tlvprochopopt_lst, skb,
864 			  init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
865 		skb->transport_header += extlen;
866 		opt = IP6CB(skb);
867 		opt->nhoff = sizeof(struct ipv6hdr);
868 		return 1;
869 	}
870 	return -1;
871 }
872 
873 /*
874  *	Creating outbound headers.
875  *
876  *	"build" functions work when skb is filled from head to tail (datagram)
877  *	"push"	functions work when headers are added from tail to head (tcp)
878  *
879  *	In both cases we assume, that caller reserved enough room
880  *	for headers.
881  */
882 
883 static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto,
884 			     struct ipv6_rt_hdr *opt,
885 			     struct in6_addr **addr_p, struct in6_addr *saddr)
886 {
887 	struct rt0_hdr *phdr, *ihdr;
888 	int hops;
889 
890 	ihdr = (struct rt0_hdr *) opt;
891 
892 	phdr = skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
893 	memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
894 
895 	hops = ihdr->rt_hdr.hdrlen >> 1;
896 
897 	if (hops > 1)
898 		memcpy(phdr->addr, ihdr->addr + 1,
899 		       (hops - 1) * sizeof(struct in6_addr));
900 
901 	phdr->addr[hops - 1] = **addr_p;
902 	*addr_p = ihdr->addr;
903 
904 	phdr->rt_hdr.nexthdr = *proto;
905 	*proto = NEXTHDR_ROUTING;
906 }
907 
908 static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
909 			     struct ipv6_rt_hdr *opt,
910 			     struct in6_addr **addr_p, struct in6_addr *saddr)
911 {
912 	struct ipv6_sr_hdr *sr_phdr, *sr_ihdr;
913 	int plen, hops;
914 
915 	sr_ihdr = (struct ipv6_sr_hdr *)opt;
916 	plen = (sr_ihdr->hdrlen + 1) << 3;
917 
918 	sr_phdr = skb_push(skb, plen);
919 	memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr));
920 
921 	hops = sr_ihdr->first_segment + 1;
922 	memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1,
923 	       (hops - 1) * sizeof(struct in6_addr));
924 
925 	sr_phdr->segments[0] = **addr_p;
926 	*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
927 
928 	if (sr_ihdr->hdrlen > hops * 2) {
929 		int tlvs_offset, tlvs_length;
930 
931 		tlvs_offset = (1 + hops * 2) << 3;
932 		tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
933 		memcpy((char *)sr_phdr + tlvs_offset,
934 		       (char *)sr_ihdr + tlvs_offset, tlvs_length);
935 	}
936 
937 #ifdef CONFIG_IPV6_SEG6_HMAC
938 	if (sr_has_hmac(sr_phdr)) {
939 		struct net *net = NULL;
940 
941 		if (skb->dev)
942 			net = dev_net(skb->dev);
943 		else if (skb->sk)
944 			net = sock_net(skb->sk);
945 
946 		WARN_ON(!net);
947 
948 		if (net)
949 			seg6_push_hmac(net, saddr, sr_phdr);
950 	}
951 #endif
952 
953 	sr_phdr->nexthdr = *proto;
954 	*proto = NEXTHDR_ROUTING;
955 }
956 
957 static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
958 			    struct ipv6_rt_hdr *opt,
959 			    struct in6_addr **addr_p, struct in6_addr *saddr)
960 {
961 	switch (opt->type) {
962 	case IPV6_SRCRT_TYPE_0:
963 	case IPV6_SRCRT_STRICT:
964 	case IPV6_SRCRT_TYPE_2:
965 		ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
966 		break;
967 	case IPV6_SRCRT_TYPE_4:
968 		ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr);
969 		break;
970 	default:
971 		break;
972 	}
973 }
974 
975 static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
976 {
977 	struct ipv6_opt_hdr *h = skb_push(skb, ipv6_optlen(opt));
978 
979 	memcpy(h, opt, ipv6_optlen(opt));
980 	h->nexthdr = *proto;
981 	*proto = type;
982 }
983 
984 void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
985 			  u8 *proto,
986 			  struct in6_addr **daddr, struct in6_addr *saddr)
987 {
988 	if (opt->srcrt) {
989 		ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr);
990 		/*
991 		 * IPV6_RTHDRDSTOPTS is ignored
992 		 * unless IPV6_RTHDR is set (RFC3542).
993 		 */
994 		if (opt->dst0opt)
995 			ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
996 	}
997 	if (opt->hopopt)
998 		ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
999 }
1000 
1001 void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
1002 {
1003 	if (opt->dst1opt)
1004 		ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
1005 }
1006 EXPORT_SYMBOL(ipv6_push_frag_opts);
1007 
1008 struct ipv6_txoptions *
1009 ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
1010 {
1011 	struct ipv6_txoptions *opt2;
1012 
1013 	opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
1014 	if (opt2) {
1015 		long dif = (char *)opt2 - (char *)opt;
1016 		memcpy(opt2, opt, opt->tot_len);
1017 		if (opt2->hopopt)
1018 			*((char **)&opt2->hopopt) += dif;
1019 		if (opt2->dst0opt)
1020 			*((char **)&opt2->dst0opt) += dif;
1021 		if (opt2->dst1opt)
1022 			*((char **)&opt2->dst1opt) += dif;
1023 		if (opt2->srcrt)
1024 			*((char **)&opt2->srcrt) += dif;
1025 		refcount_set(&opt2->refcnt, 1);
1026 	}
1027 	return opt2;
1028 }
1029 EXPORT_SYMBOL_GPL(ipv6_dup_options);
1030 
1031 static int ipv6_renew_option(void *ohdr,
1032 			     struct ipv6_opt_hdr __user *newopt, int newoptlen,
1033 			     int inherit,
1034 			     struct ipv6_opt_hdr **hdr,
1035 			     char **p)
1036 {
1037 	if (inherit) {
1038 		if (ohdr) {
1039 			memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
1040 			*hdr = (struct ipv6_opt_hdr *)*p;
1041 			*p += CMSG_ALIGN(ipv6_optlen(*hdr));
1042 		}
1043 	} else {
1044 		if (newopt) {
1045 			if (copy_from_user(*p, newopt, newoptlen))
1046 				return -EFAULT;
1047 			*hdr = (struct ipv6_opt_hdr *)*p;
1048 			if (ipv6_optlen(*hdr) > newoptlen)
1049 				return -EINVAL;
1050 			*p += CMSG_ALIGN(newoptlen);
1051 		}
1052 	}
1053 	return 0;
1054 }
1055 
1056 /**
1057  * ipv6_renew_options - replace a specific ext hdr with a new one.
1058  *
1059  * @sk: sock from which to allocate memory
1060  * @opt: original options
1061  * @newtype: option type to replace in @opt
1062  * @newopt: new option of type @newtype to replace (user-mem)
1063  * @newoptlen: length of @newopt
1064  *
1065  * Returns a new set of options which is a copy of @opt with the
1066  * option type @newtype replaced with @newopt.
1067  *
1068  * @opt may be NULL, in which case a new set of options is returned
1069  * containing just @newopt.
1070  *
1071  * @newopt may be NULL, in which case the specified option type is
1072  * not copied into the new set of options.
1073  *
1074  * The new set of options is allocated from the socket option memory
1075  * buffer of @sk.
1076  */
1077 struct ipv6_txoptions *
1078 ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
1079 		   int newtype,
1080 		   struct ipv6_opt_hdr __user *newopt, int newoptlen)
1081 {
1082 	int tot_len = 0;
1083 	char *p;
1084 	struct ipv6_txoptions *opt2;
1085 	int err;
1086 
1087 	if (opt) {
1088 		if (newtype != IPV6_HOPOPTS && opt->hopopt)
1089 			tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt));
1090 		if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt)
1091 			tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt));
1092 		if (newtype != IPV6_RTHDR && opt->srcrt)
1093 			tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt));
1094 		if (newtype != IPV6_DSTOPTS && opt->dst1opt)
1095 			tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
1096 	}
1097 
1098 	if (newopt && newoptlen)
1099 		tot_len += CMSG_ALIGN(newoptlen);
1100 
1101 	if (!tot_len)
1102 		return NULL;
1103 
1104 	tot_len += sizeof(*opt2);
1105 	opt2 = sock_kmalloc(sk, tot_len, GFP_ATOMIC);
1106 	if (!opt2)
1107 		return ERR_PTR(-ENOBUFS);
1108 
1109 	memset(opt2, 0, tot_len);
1110 	refcount_set(&opt2->refcnt, 1);
1111 	opt2->tot_len = tot_len;
1112 	p = (char *)(opt2 + 1);
1113 
1114 	err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
1115 				newtype != IPV6_HOPOPTS,
1116 				&opt2->hopopt, &p);
1117 	if (err)
1118 		goto out;
1119 
1120 	err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
1121 				newtype != IPV6_RTHDRDSTOPTS,
1122 				&opt2->dst0opt, &p);
1123 	if (err)
1124 		goto out;
1125 
1126 	err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
1127 				newtype != IPV6_RTHDR,
1128 				(struct ipv6_opt_hdr **)&opt2->srcrt, &p);
1129 	if (err)
1130 		goto out;
1131 
1132 	err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
1133 				newtype != IPV6_DSTOPTS,
1134 				&opt2->dst1opt, &p);
1135 	if (err)
1136 		goto out;
1137 
1138 	opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) +
1139 			  (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) +
1140 			  (opt2->srcrt ? ipv6_optlen(opt2->srcrt) : 0);
1141 	opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0);
1142 
1143 	return opt2;
1144 out:
1145 	sock_kfree_s(sk, opt2, opt2->tot_len);
1146 	return ERR_PTR(err);
1147 }
1148 
1149 /**
1150  * ipv6_renew_options_kern - replace a specific ext hdr with a new one.
1151  *
1152  * @sk: sock from which to allocate memory
1153  * @opt: original options
1154  * @newtype: option type to replace in @opt
1155  * @newopt: new option of type @newtype to replace (kernel-mem)
1156  * @newoptlen: length of @newopt
1157  *
1158  * See ipv6_renew_options().  The difference is that @newopt is
1159  * kernel memory, rather than user memory.
1160  */
1161 struct ipv6_txoptions *
1162 ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt,
1163 			int newtype, struct ipv6_opt_hdr *newopt,
1164 			int newoptlen)
1165 {
1166 	struct ipv6_txoptions *ret_val;
1167 	const mm_segment_t old_fs = get_fs();
1168 
1169 	set_fs(KERNEL_DS);
1170 	ret_val = ipv6_renew_options(sk, opt, newtype,
1171 				     (struct ipv6_opt_hdr __user *)newopt,
1172 				     newoptlen);
1173 	set_fs(old_fs);
1174 	return ret_val;
1175 }
1176 
1177 struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
1178 					  struct ipv6_txoptions *opt)
1179 {
1180 	/*
1181 	 * ignore the dest before srcrt unless srcrt is being included.
1182 	 * --yoshfuji
1183 	 */
1184 	if (opt && opt->dst0opt && !opt->srcrt) {
1185 		if (opt_space != opt) {
1186 			memcpy(opt_space, opt, sizeof(*opt_space));
1187 			opt = opt_space;
1188 		}
1189 		opt->opt_nflen -= ipv6_optlen(opt->dst0opt);
1190 		opt->dst0opt = NULL;
1191 	}
1192 
1193 	return opt;
1194 }
1195 EXPORT_SYMBOL_GPL(ipv6_fixup_options);
1196 
1197 /**
1198  * fl6_update_dst - update flowi destination address with info given
1199  *                  by srcrt option, if any.
1200  *
1201  * @fl6: flowi6 for which daddr is to be updated
1202  * @opt: struct ipv6_txoptions in which to look for srcrt opt
1203  * @orig: copy of original daddr address if modified
1204  *
1205  * Returns NULL if no txoptions or no srcrt, otherwise returns orig
1206  * and initial value of fl6->daddr set in orig
1207  */
1208 struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
1209 				const struct ipv6_txoptions *opt,
1210 				struct in6_addr *orig)
1211 {
1212 	if (!opt || !opt->srcrt)
1213 		return NULL;
1214 
1215 	*orig = fl6->daddr;
1216 
1217 	switch (opt->srcrt->type) {
1218 	case IPV6_SRCRT_TYPE_0:
1219 	case IPV6_SRCRT_STRICT:
1220 	case IPV6_SRCRT_TYPE_2:
1221 		fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
1222 		break;
1223 	case IPV6_SRCRT_TYPE_4:
1224 	{
1225 		struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt;
1226 
1227 		fl6->daddr = srh->segments[srh->segments_left];
1228 		break;
1229 	}
1230 	default:
1231 		return NULL;
1232 	}
1233 
1234 	return orig;
1235 }
1236 EXPORT_SYMBOL_GPL(fl6_update_dst);
1237