xref: /openbmc/linux/net/ipv6/icmp.c (revision 842df073)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/protocol.h>
61 #include <net/raw.h>
62 #include <net/rawv6.h>
63 #include <net/transp_v6.h>
64 #include <net/ip6_route.h>
65 #include <net/addrconf.h>
66 #include <net/icmp.h>
67 #include <net/xfrm.h>
68 #include <net/inet_common.h>
69 
70 #include <asm/uaccess.h>
71 
72 /*
73  *	The ICMP socket(s). This is the most convenient way to flow control
74  *	our ICMP output as well as maintain a clean interface throughout
75  *	all layers. All Socketless IP sends will soon be gone.
76  *
77  *	On SMP we have one ICMP socket per-cpu.
78  */
79 static inline struct sock *icmpv6_sk(struct net *net)
80 {
81 	return net->ipv6.icmp_sk[smp_processor_id()];
82 }
83 
84 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
85 		       u8 type, u8 code, int offset, __be32 info)
86 {
87 	struct net *net = dev_net(skb->dev);
88 
89 	if (type == ICMPV6_PKT_TOOBIG)
90 		ip6_update_pmtu(skb, net, info, 0, 0);
91 	else if (type == NDISC_REDIRECT)
92 		ip6_redirect(skb, net, 0, 0);
93 }
94 
95 static int icmpv6_rcv(struct sk_buff *skb);
96 
97 static const struct inet6_protocol icmpv6_protocol = {
98 	.handler	=	icmpv6_rcv,
99 	.err_handler	=	icmpv6_err,
100 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
101 };
102 
103 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	local_bh_disable();
108 
109 	sk = icmpv6_sk(net);
110 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
111 		/* This can happen if the output path (f.e. SIT or
112 		 * ip6ip6 tunnel) signals dst_link_failure() for an
113 		 * outgoing ICMP6 packet.
114 		 */
115 		local_bh_enable();
116 		return NULL;
117 	}
118 	return sk;
119 }
120 
121 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
122 {
123 	spin_unlock_bh(&sk->sk_lock.slock);
124 }
125 
126 /*
127  * Slightly more convenient version of icmpv6_send.
128  */
129 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
130 {
131 	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
132 	kfree_skb(skb);
133 }
134 
135 /*
136  * Figure out, may we reply to this packet with icmp error.
137  *
138  * We do not reply, if:
139  *	- it was icmp error message.
140  *	- it is truncated, so that it is known, that protocol is ICMPV6
141  *	  (i.e. in the middle of some exthdr)
142  *
143  *	--ANK (980726)
144  */
145 
146 static bool is_ineligible(const struct sk_buff *skb)
147 {
148 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
149 	int len = skb->len - ptr;
150 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
151 	__be16 frag_off;
152 
153 	if (len < 0)
154 		return true;
155 
156 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
157 	if (ptr < 0)
158 		return false;
159 	if (nexthdr == IPPROTO_ICMPV6) {
160 		u8 _type, *tp;
161 		tp = skb_header_pointer(skb,
162 			ptr+offsetof(struct icmp6hdr, icmp6_type),
163 			sizeof(_type), &_type);
164 		if (tp == NULL ||
165 		    !(*tp & ICMPV6_INFOMSG_MASK))
166 			return true;
167 	}
168 	return false;
169 }
170 
171 /*
172  * Check the ICMP output rate limit
173  */
174 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
175 				      struct flowi6 *fl6)
176 {
177 	struct dst_entry *dst;
178 	struct net *net = sock_net(sk);
179 	bool res = false;
180 
181 	/* Informational messages are not limited. */
182 	if (type & ICMPV6_INFOMSG_MASK)
183 		return true;
184 
185 	/* Do not limit pmtu discovery, it would break it. */
186 	if (type == ICMPV6_PKT_TOOBIG)
187 		return true;
188 
189 	/*
190 	 * Look up the output route.
191 	 * XXX: perhaps the expire for routing entries cloned by
192 	 * this lookup should be more aggressive (not longer than timeout).
193 	 */
194 	dst = ip6_route_output(net, sk, fl6);
195 	if (dst->error) {
196 		IP6_INC_STATS(net, ip6_dst_idev(dst),
197 			      IPSTATS_MIB_OUTNOROUTES);
198 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
199 		res = true;
200 	} else {
201 		struct rt6_info *rt = (struct rt6_info *)dst;
202 		int tmo = net->ipv6.sysctl.icmpv6_time;
203 		struct inet_peer *peer;
204 
205 		/* Give more bandwidth to wider prefixes. */
206 		if (rt->rt6i_dst.plen < 128)
207 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
208 
209 		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
210 		res = inet_peer_xrlim_allow(peer, tmo);
211 		if (peer)
212 			inet_putpeer(peer);
213 	}
214 	dst_release(dst);
215 	return res;
216 }
217 
218 /*
219  *	an inline helper for the "simple" if statement below
220  *	checks if parameter problem report is caused by an
221  *	unrecognized IPv6 option that has the Option Type
222  *	highest-order two bits set to 10
223  */
224 
225 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
226 {
227 	u8 _optval, *op;
228 
229 	offset += skb_network_offset(skb);
230 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
231 	if (op == NULL)
232 		return true;
233 	return (*op & 0xC0) == 0x80;
234 }
235 
236 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
237 {
238 	struct sk_buff *skb;
239 	struct icmp6hdr *icmp6h;
240 	int err = 0;
241 
242 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
243 		goto out;
244 
245 	icmp6h = icmp6_hdr(skb);
246 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
247 	icmp6h->icmp6_cksum = 0;
248 
249 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
250 		skb->csum = csum_partial(icmp6h,
251 					sizeof(struct icmp6hdr), skb->csum);
252 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
253 						      &fl6->daddr,
254 						      len, fl6->flowi6_proto,
255 						      skb->csum);
256 	} else {
257 		__wsum tmp_csum = 0;
258 
259 		skb_queue_walk(&sk->sk_write_queue, skb) {
260 			tmp_csum = csum_add(tmp_csum, skb->csum);
261 		}
262 
263 		tmp_csum = csum_partial(icmp6h,
264 					sizeof(struct icmp6hdr), tmp_csum);
265 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
266 						      &fl6->daddr,
267 						      len, fl6->flowi6_proto,
268 						      tmp_csum);
269 	}
270 	ip6_push_pending_frames(sk);
271 out:
272 	return err;
273 }
274 
275 struct icmpv6_msg {
276 	struct sk_buff	*skb;
277 	int		offset;
278 	uint8_t		type;
279 };
280 
281 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
282 {
283 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
284 	struct sk_buff *org_skb = msg->skb;
285 	__wsum csum = 0;
286 
287 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
288 				      to, len, csum);
289 	skb->csum = csum_block_add(skb->csum, csum, odd);
290 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
291 		nf_ct_attach(skb, org_skb);
292 	return 0;
293 }
294 
295 #if IS_ENABLED(CONFIG_IPV6_MIP6)
296 static void mip6_addr_swap(struct sk_buff *skb)
297 {
298 	struct ipv6hdr *iph = ipv6_hdr(skb);
299 	struct inet6_skb_parm *opt = IP6CB(skb);
300 	struct ipv6_destopt_hao *hao;
301 	struct in6_addr tmp;
302 	int off;
303 
304 	if (opt->dsthao) {
305 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
306 		if (likely(off >= 0)) {
307 			hao = (struct ipv6_destopt_hao *)
308 					(skb_network_header(skb) + off);
309 			tmp = iph->saddr;
310 			iph->saddr = hao->addr;
311 			hao->addr = tmp;
312 		}
313 	}
314 }
315 #else
316 static inline void mip6_addr_swap(struct sk_buff *skb) {}
317 #endif
318 
319 static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
320 					     struct sock *sk, struct flowi6 *fl6)
321 {
322 	struct dst_entry *dst, *dst2;
323 	struct flowi6 fl2;
324 	int err;
325 
326 	err = ip6_dst_lookup(sk, &dst, fl6);
327 	if (err)
328 		return ERR_PTR(err);
329 
330 	/*
331 	 * We won't send icmp if the destination is known
332 	 * anycast.
333 	 */
334 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
335 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
336 		dst_release(dst);
337 		return ERR_PTR(-EINVAL);
338 	}
339 
340 	/* No need to clone since we're just using its address. */
341 	dst2 = dst;
342 
343 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
344 	if (!IS_ERR(dst)) {
345 		if (dst != dst2)
346 			return dst;
347 	} else {
348 		if (PTR_ERR(dst) == -EPERM)
349 			dst = NULL;
350 		else
351 			return dst;
352 	}
353 
354 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
355 	if (err)
356 		goto relookup_failed;
357 
358 	err = ip6_dst_lookup(sk, &dst2, &fl2);
359 	if (err)
360 		goto relookup_failed;
361 
362 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
363 	if (!IS_ERR(dst2)) {
364 		dst_release(dst);
365 		dst = dst2;
366 	} else {
367 		err = PTR_ERR(dst2);
368 		if (err == -EPERM) {
369 			dst_release(dst);
370 			return dst2;
371 		} else
372 			goto relookup_failed;
373 	}
374 
375 relookup_failed:
376 	if (dst)
377 		return dst;
378 	return ERR_PTR(err);
379 }
380 
381 /*
382  *	Send an ICMP message in response to a packet in error
383  */
384 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
385 {
386 	struct net *net = dev_net(skb->dev);
387 	struct inet6_dev *idev = NULL;
388 	struct ipv6hdr *hdr = ipv6_hdr(skb);
389 	struct sock *sk;
390 	struct ipv6_pinfo *np;
391 	const struct in6_addr *saddr = NULL;
392 	struct dst_entry *dst;
393 	struct icmp6hdr tmp_hdr;
394 	struct flowi6 fl6;
395 	struct icmpv6_msg msg;
396 	int iif = 0;
397 	int addr_type = 0;
398 	int len;
399 	int hlimit;
400 	int err = 0;
401 
402 	if ((u8 *)hdr < skb->head ||
403 	    (skb->network_header + sizeof(*hdr)) > skb->tail)
404 		return;
405 
406 	/*
407 	 *	Make sure we respect the rules
408 	 *	i.e. RFC 1885 2.4(e)
409 	 *	Rule (e.1) is enforced by not using icmpv6_send
410 	 *	in any code that processes icmp errors.
411 	 */
412 	addr_type = ipv6_addr_type(&hdr->daddr);
413 
414 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
415 		saddr = &hdr->daddr;
416 
417 	/*
418 	 *	Dest addr check
419 	 */
420 
421 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
422 		if (type != ICMPV6_PKT_TOOBIG &&
423 		    !(type == ICMPV6_PARAMPROB &&
424 		      code == ICMPV6_UNK_OPTION &&
425 		      (opt_unrec(skb, info))))
426 			return;
427 
428 		saddr = NULL;
429 	}
430 
431 	addr_type = ipv6_addr_type(&hdr->saddr);
432 
433 	/*
434 	 *	Source addr check
435 	 */
436 
437 	if (__ipv6_addr_needs_scope_id(addr_type))
438 		iif = skb->dev->ifindex;
439 
440 	/*
441 	 *	Must not send error if the source does not uniquely
442 	 *	identify a single node (RFC2463 Section 2.4).
443 	 *	We check unspecified / multicast addresses here,
444 	 *	and anycast addresses will be checked later.
445 	 */
446 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
447 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
448 		return;
449 	}
450 
451 	/*
452 	 *	Never answer to a ICMP packet.
453 	 */
454 	if (is_ineligible(skb)) {
455 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
456 		return;
457 	}
458 
459 	mip6_addr_swap(skb);
460 
461 	memset(&fl6, 0, sizeof(fl6));
462 	fl6.flowi6_proto = IPPROTO_ICMPV6;
463 	fl6.daddr = hdr->saddr;
464 	if (saddr)
465 		fl6.saddr = *saddr;
466 	fl6.flowi6_oif = iif;
467 	fl6.fl6_icmp_type = type;
468 	fl6.fl6_icmp_code = code;
469 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
470 
471 	sk = icmpv6_xmit_lock(net);
472 	if (sk == NULL)
473 		return;
474 	np = inet6_sk(sk);
475 
476 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
477 		goto out;
478 
479 	tmp_hdr.icmp6_type = type;
480 	tmp_hdr.icmp6_code = code;
481 	tmp_hdr.icmp6_cksum = 0;
482 	tmp_hdr.icmp6_pointer = htonl(info);
483 
484 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
485 		fl6.flowi6_oif = np->mcast_oif;
486 	else if (!fl6.flowi6_oif)
487 		fl6.flowi6_oif = np->ucast_oif;
488 
489 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
490 	if (IS_ERR(dst))
491 		goto out;
492 
493 	if (ipv6_addr_is_multicast(&fl6.daddr))
494 		hlimit = np->mcast_hops;
495 	else
496 		hlimit = np->hop_limit;
497 	if (hlimit < 0)
498 		hlimit = ip6_dst_hoplimit(dst);
499 
500 	msg.skb = skb;
501 	msg.offset = skb_network_offset(skb);
502 	msg.type = type;
503 
504 	len = skb->len - msg.offset;
505 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
506 	if (len < 0) {
507 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
508 		goto out_dst_release;
509 	}
510 
511 	rcu_read_lock();
512 	idev = __in6_dev_get(skb->dev);
513 
514 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
515 			      len + sizeof(struct icmp6hdr),
516 			      sizeof(struct icmp6hdr), hlimit,
517 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
518 			      MSG_DONTWAIT, np->dontfrag);
519 	if (err) {
520 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
521 		ip6_flush_pending_frames(sk);
522 	} else {
523 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
524 						 len + sizeof(struct icmp6hdr));
525 	}
526 	rcu_read_unlock();
527 out_dst_release:
528 	dst_release(dst);
529 out:
530 	icmpv6_xmit_unlock(sk);
531 }
532 EXPORT_SYMBOL(icmpv6_send);
533 
534 static void icmpv6_echo_reply(struct sk_buff *skb)
535 {
536 	struct net *net = dev_net(skb->dev);
537 	struct sock *sk;
538 	struct inet6_dev *idev;
539 	struct ipv6_pinfo *np;
540 	const struct in6_addr *saddr = NULL;
541 	struct icmp6hdr *icmph = icmp6_hdr(skb);
542 	struct icmp6hdr tmp_hdr;
543 	struct flowi6 fl6;
544 	struct icmpv6_msg msg;
545 	struct dst_entry *dst;
546 	int err = 0;
547 	int hlimit;
548 
549 	saddr = &ipv6_hdr(skb)->daddr;
550 
551 	if (!ipv6_unicast_destination(skb))
552 		saddr = NULL;
553 
554 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
555 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
556 
557 	memset(&fl6, 0, sizeof(fl6));
558 	fl6.flowi6_proto = IPPROTO_ICMPV6;
559 	fl6.daddr = ipv6_hdr(skb)->saddr;
560 	if (saddr)
561 		fl6.saddr = *saddr;
562 	fl6.flowi6_oif = skb->dev->ifindex;
563 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
564 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
565 
566 	sk = icmpv6_xmit_lock(net);
567 	if (sk == NULL)
568 		return;
569 	np = inet6_sk(sk);
570 
571 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
572 		fl6.flowi6_oif = np->mcast_oif;
573 	else if (!fl6.flowi6_oif)
574 		fl6.flowi6_oif = np->ucast_oif;
575 
576 	err = ip6_dst_lookup(sk, &dst, &fl6);
577 	if (err)
578 		goto out;
579 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
580 	if (IS_ERR(dst))
581 		goto out;
582 
583 	if (ipv6_addr_is_multicast(&fl6.daddr))
584 		hlimit = np->mcast_hops;
585 	else
586 		hlimit = np->hop_limit;
587 	if (hlimit < 0)
588 		hlimit = ip6_dst_hoplimit(dst);
589 
590 	idev = __in6_dev_get(skb->dev);
591 
592 	msg.skb = skb;
593 	msg.offset = 0;
594 	msg.type = ICMPV6_ECHO_REPLY;
595 
596 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
597 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
598 				(struct rt6_info *)dst, MSG_DONTWAIT,
599 				np->dontfrag);
600 
601 	if (err) {
602 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
603 		ip6_flush_pending_frames(sk);
604 	} else {
605 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
606 						 skb->len + sizeof(struct icmp6hdr));
607 	}
608 	dst_release(dst);
609 out:
610 	icmpv6_xmit_unlock(sk);
611 }
612 
613 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
614 {
615 	const struct inet6_protocol *ipprot;
616 	int inner_offset;
617 	__be16 frag_off;
618 	u8 nexthdr;
619 
620 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
621 		return;
622 
623 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
624 	if (ipv6_ext_hdr(nexthdr)) {
625 		/* now skip over extension headers */
626 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
627 						&nexthdr, &frag_off);
628 		if (inner_offset<0)
629 			return;
630 	} else {
631 		inner_offset = sizeof(struct ipv6hdr);
632 	}
633 
634 	/* Checkin header including 8 bytes of inner protocol header. */
635 	if (!pskb_may_pull(skb, inner_offset+8))
636 		return;
637 
638 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
639 	   Without this we will not able f.e. to make source routed
640 	   pmtu discovery.
641 	   Corresponding argument (opt) to notifiers is already added.
642 	   --ANK (980726)
643 	 */
644 
645 	rcu_read_lock();
646 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
647 	if (ipprot && ipprot->err_handler)
648 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
649 	rcu_read_unlock();
650 
651 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
652 }
653 
654 /*
655  *	Handle icmp messages
656  */
657 
658 static int icmpv6_rcv(struct sk_buff *skb)
659 {
660 	struct net_device *dev = skb->dev;
661 	struct inet6_dev *idev = __in6_dev_get(dev);
662 	const struct in6_addr *saddr, *daddr;
663 	struct icmp6hdr *hdr;
664 	u8 type;
665 
666 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
667 		struct sec_path *sp = skb_sec_path(skb);
668 		int nh;
669 
670 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
671 				 XFRM_STATE_ICMP))
672 			goto drop_no_count;
673 
674 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
675 			goto drop_no_count;
676 
677 		nh = skb_network_offset(skb);
678 		skb_set_network_header(skb, sizeof(*hdr));
679 
680 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
681 			goto drop_no_count;
682 
683 		skb_set_network_header(skb, nh);
684 	}
685 
686 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
687 
688 	saddr = &ipv6_hdr(skb)->saddr;
689 	daddr = &ipv6_hdr(skb)->daddr;
690 
691 	/* Perform checksum. */
692 	switch (skb->ip_summed) {
693 	case CHECKSUM_COMPLETE:
694 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
695 				     skb->csum))
696 			break;
697 		/* fall through */
698 	case CHECKSUM_NONE:
699 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
700 					     IPPROTO_ICMPV6, 0));
701 		if (__skb_checksum_complete(skb)) {
702 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
703 				       saddr, daddr);
704 			goto discard_it;
705 		}
706 	}
707 
708 	if (!pskb_pull(skb, sizeof(*hdr)))
709 		goto discard_it;
710 
711 	hdr = icmp6_hdr(skb);
712 
713 	type = hdr->icmp6_type;
714 
715 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
716 
717 	switch (type) {
718 	case ICMPV6_ECHO_REQUEST:
719 		icmpv6_echo_reply(skb);
720 		break;
721 
722 	case ICMPV6_ECHO_REPLY:
723 		/* we couldn't care less */
724 		break;
725 
726 	case ICMPV6_PKT_TOOBIG:
727 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
728 		   standard destination cache. Seems, only "advanced"
729 		   destination cache will allow to solve this problem
730 		   --ANK (980726)
731 		 */
732 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
733 			goto discard_it;
734 		hdr = icmp6_hdr(skb);
735 
736 		/*
737 		 *	Drop through to notify
738 		 */
739 
740 	case ICMPV6_DEST_UNREACH:
741 	case ICMPV6_TIME_EXCEED:
742 	case ICMPV6_PARAMPROB:
743 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
744 		break;
745 
746 	case NDISC_ROUTER_SOLICITATION:
747 	case NDISC_ROUTER_ADVERTISEMENT:
748 	case NDISC_NEIGHBOUR_SOLICITATION:
749 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
750 	case NDISC_REDIRECT:
751 		ndisc_rcv(skb);
752 		break;
753 
754 	case ICMPV6_MGM_QUERY:
755 		igmp6_event_query(skb);
756 		break;
757 
758 	case ICMPV6_MGM_REPORT:
759 		igmp6_event_report(skb);
760 		break;
761 
762 	case ICMPV6_MGM_REDUCTION:
763 	case ICMPV6_NI_QUERY:
764 	case ICMPV6_NI_REPLY:
765 	case ICMPV6_MLD2_REPORT:
766 	case ICMPV6_DHAAD_REQUEST:
767 	case ICMPV6_DHAAD_REPLY:
768 	case ICMPV6_MOBILE_PREFIX_SOL:
769 	case ICMPV6_MOBILE_PREFIX_ADV:
770 		break;
771 
772 	default:
773 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
774 
775 		/* informational */
776 		if (type & ICMPV6_INFOMSG_MASK)
777 			break;
778 
779 		/*
780 		 * error of unknown type.
781 		 * must pass to upper level
782 		 */
783 
784 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
785 	}
786 
787 	kfree_skb(skb);
788 	return 0;
789 
790 discard_it:
791 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
792 drop_no_count:
793 	kfree_skb(skb);
794 	return 0;
795 }
796 
797 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
798 		      u8 type,
799 		      const struct in6_addr *saddr,
800 		      const struct in6_addr *daddr,
801 		      int oif)
802 {
803 	memset(fl6, 0, sizeof(*fl6));
804 	fl6->saddr = *saddr;
805 	fl6->daddr = *daddr;
806 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
807 	fl6->fl6_icmp_type	= type;
808 	fl6->fl6_icmp_code	= 0;
809 	fl6->flowi6_oif		= oif;
810 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
811 }
812 
813 /*
814  * Special lock-class for __icmpv6_sk:
815  */
816 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
817 
818 static int __net_init icmpv6_sk_init(struct net *net)
819 {
820 	struct sock *sk;
821 	int err, i, j;
822 
823 	net->ipv6.icmp_sk =
824 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
825 	if (net->ipv6.icmp_sk == NULL)
826 		return -ENOMEM;
827 
828 	for_each_possible_cpu(i) {
829 		err = inet_ctl_sock_create(&sk, PF_INET6,
830 					   SOCK_RAW, IPPROTO_ICMPV6, net);
831 		if (err < 0) {
832 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
833 			       err);
834 			goto fail;
835 		}
836 
837 		net->ipv6.icmp_sk[i] = sk;
838 
839 		/*
840 		 * Split off their lock-class, because sk->sk_dst_lock
841 		 * gets used from softirqs, which is safe for
842 		 * __icmpv6_sk (because those never get directly used
843 		 * via userspace syscalls), but unsafe for normal sockets.
844 		 */
845 		lockdep_set_class(&sk->sk_dst_lock,
846 				  &icmpv6_socket_sk_dst_lock_key);
847 
848 		/* Enough space for 2 64K ICMP packets, including
849 		 * sk_buff struct overhead.
850 		 */
851 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
852 	}
853 	return 0;
854 
855  fail:
856 	for (j = 0; j < i; j++)
857 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
858 	kfree(net->ipv6.icmp_sk);
859 	return err;
860 }
861 
862 static void __net_exit icmpv6_sk_exit(struct net *net)
863 {
864 	int i;
865 
866 	for_each_possible_cpu(i) {
867 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
868 	}
869 	kfree(net->ipv6.icmp_sk);
870 }
871 
872 static struct pernet_operations icmpv6_sk_ops = {
873        .init = icmpv6_sk_init,
874        .exit = icmpv6_sk_exit,
875 };
876 
877 int __init icmpv6_init(void)
878 {
879 	int err;
880 
881 	err = register_pernet_subsys(&icmpv6_sk_ops);
882 	if (err < 0)
883 		return err;
884 
885 	err = -EAGAIN;
886 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
887 		goto fail;
888 	return 0;
889 
890 fail:
891 	pr_err("Failed to register ICMP6 protocol\n");
892 	unregister_pernet_subsys(&icmpv6_sk_ops);
893 	return err;
894 }
895 
896 void icmpv6_cleanup(void)
897 {
898 	unregister_pernet_subsys(&icmpv6_sk_ops);
899 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
900 }
901 
902 
903 static const struct icmp6_err {
904 	int err;
905 	int fatal;
906 } tab_unreach[] = {
907 	{	/* NOROUTE */
908 		.err	= ENETUNREACH,
909 		.fatal	= 0,
910 	},
911 	{	/* ADM_PROHIBITED */
912 		.err	= EACCES,
913 		.fatal	= 1,
914 	},
915 	{	/* Was NOT_NEIGHBOUR, now reserved */
916 		.err	= EHOSTUNREACH,
917 		.fatal	= 0,
918 	},
919 	{	/* ADDR_UNREACH	*/
920 		.err	= EHOSTUNREACH,
921 		.fatal	= 0,
922 	},
923 	{	/* PORT_UNREACH	*/
924 		.err	= ECONNREFUSED,
925 		.fatal	= 1,
926 	},
927 };
928 
929 int icmpv6_err_convert(u8 type, u8 code, int *err)
930 {
931 	int fatal = 0;
932 
933 	*err = EPROTO;
934 
935 	switch (type) {
936 	case ICMPV6_DEST_UNREACH:
937 		fatal = 1;
938 		if (code <= ICMPV6_PORT_UNREACH) {
939 			*err  = tab_unreach[code].err;
940 			fatal = tab_unreach[code].fatal;
941 		}
942 		break;
943 
944 	case ICMPV6_PKT_TOOBIG:
945 		*err = EMSGSIZE;
946 		break;
947 
948 	case ICMPV6_PARAMPROB:
949 		*err = EPROTO;
950 		fatal = 1;
951 		break;
952 
953 	case ICMPV6_TIME_EXCEED:
954 		*err = EHOSTUNREACH;
955 		break;
956 	}
957 
958 	return fatal;
959 }
960 EXPORT_SYMBOL(icmpv6_err_convert);
961 
962 #ifdef CONFIG_SYSCTL
963 ctl_table ipv6_icmp_table_template[] = {
964 	{
965 		.procname	= "ratelimit",
966 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
967 		.maxlen		= sizeof(int),
968 		.mode		= 0644,
969 		.proc_handler	= proc_dointvec_ms_jiffies,
970 	},
971 	{ },
972 };
973 
974 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
975 {
976 	struct ctl_table *table;
977 
978 	table = kmemdup(ipv6_icmp_table_template,
979 			sizeof(ipv6_icmp_table_template),
980 			GFP_KERNEL);
981 
982 	if (table)
983 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
984 
985 	return table;
986 }
987 #endif
988 
989