xref: /openbmc/linux/net/ipv6/icmp.c (revision 4cdf507d)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 
72 #include <asm/uaccess.h>
73 
74 /*
75  *	The ICMP socket(s). This is the most convenient way to flow control
76  *	our ICMP output as well as maintain a clean interface throughout
77  *	all layers. All Socketless IP sends will soon be gone.
78  *
79  *	On SMP we have one ICMP socket per-cpu.
80  */
81 static inline struct sock *icmpv6_sk(struct net *net)
82 {
83 	return net->ipv6.icmp_sk[smp_processor_id()];
84 }
85 
86 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
87 		       u8 type, u8 code, int offset, __be32 info)
88 {
89 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
90 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
91 	struct net *net = dev_net(skb->dev);
92 
93 	if (type == ICMPV6_PKT_TOOBIG)
94 		ip6_update_pmtu(skb, net, info, 0, 0);
95 	else if (type == NDISC_REDIRECT)
96 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
97 
98 	if (!(type & ICMPV6_INFOMSG_MASK))
99 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
100 			ping_err(skb, offset, info);
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
112 {
113 	struct sock *sk;
114 
115 	local_bh_disable();
116 
117 	sk = icmpv6_sk(net);
118 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
119 		/* This can happen if the output path (f.e. SIT or
120 		 * ip6ip6 tunnel) signals dst_link_failure() for an
121 		 * outgoing ICMP6 packet.
122 		 */
123 		local_bh_enable();
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock_bh(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (tp == NULL ||
164 		    !(*tp & ICMPV6_INFOMSG_MASK))
165 			return true;
166 	}
167 	return false;
168 }
169 
170 /*
171  * Check the ICMP output rate limit
172  */
173 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
174 			       struct flowi6 *fl6)
175 {
176 	struct net *net = sock_net(sk);
177 	struct dst_entry *dst;
178 	bool res = false;
179 
180 	/* Informational messages are not limited. */
181 	if (type & ICMPV6_INFOMSG_MASK)
182 		return true;
183 
184 	/* Do not limit pmtu discovery, it would break it. */
185 	if (type == ICMPV6_PKT_TOOBIG)
186 		return true;
187 
188 	/*
189 	 * Look up the output route.
190 	 * XXX: perhaps the expire for routing entries cloned by
191 	 * this lookup should be more aggressive (not longer than timeout).
192 	 */
193 	dst = ip6_route_output(net, sk, fl6);
194 	if (dst->error) {
195 		IP6_INC_STATS(net, ip6_dst_idev(dst),
196 			      IPSTATS_MIB_OUTNOROUTES);
197 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
198 		res = true;
199 	} else {
200 		struct rt6_info *rt = (struct rt6_info *)dst;
201 		int tmo = net->ipv6.sysctl.icmpv6_time;
202 
203 		/* Give more bandwidth to wider prefixes. */
204 		if (rt->rt6i_dst.plen < 128)
205 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
206 
207 		if (icmp_global_allow()) {
208 			struct inet_peer *peer;
209 
210 			peer = inet_getpeer_v6(net->ipv6.peers,
211 					       &rt->rt6i_dst.addr, 1);
212 			res = inet_peer_xrlim_allow(peer, tmo);
213 			if (peer)
214 				inet_putpeer(peer);
215 		}
216 	}
217 	dst_release(dst);
218 	return res;
219 }
220 
221 /*
222  *	an inline helper for the "simple" if statement below
223  *	checks if parameter problem report is caused by an
224  *	unrecognized IPv6 option that has the Option Type
225  *	highest-order two bits set to 10
226  */
227 
228 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
229 {
230 	u8 _optval, *op;
231 
232 	offset += skb_network_offset(skb);
233 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
234 	if (op == NULL)
235 		return true;
236 	return (*op & 0xC0) == 0x80;
237 }
238 
239 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
240 			       struct icmp6hdr *thdr, int len)
241 {
242 	struct sk_buff *skb;
243 	struct icmp6hdr *icmp6h;
244 	int err = 0;
245 
246 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
247 		goto out;
248 
249 	icmp6h = icmp6_hdr(skb);
250 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
251 	icmp6h->icmp6_cksum = 0;
252 
253 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
254 		skb->csum = csum_partial(icmp6h,
255 					sizeof(struct icmp6hdr), skb->csum);
256 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
257 						      &fl6->daddr,
258 						      len, fl6->flowi6_proto,
259 						      skb->csum);
260 	} else {
261 		__wsum tmp_csum = 0;
262 
263 		skb_queue_walk(&sk->sk_write_queue, skb) {
264 			tmp_csum = csum_add(tmp_csum, skb->csum);
265 		}
266 
267 		tmp_csum = csum_partial(icmp6h,
268 					sizeof(struct icmp6hdr), tmp_csum);
269 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
270 						      &fl6->daddr,
271 						      len, fl6->flowi6_proto,
272 						      tmp_csum);
273 	}
274 	ip6_push_pending_frames(sk);
275 out:
276 	return err;
277 }
278 
279 struct icmpv6_msg {
280 	struct sk_buff	*skb;
281 	int		offset;
282 	uint8_t		type;
283 };
284 
285 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
286 {
287 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
288 	struct sk_buff *org_skb = msg->skb;
289 	__wsum csum = 0;
290 
291 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
292 				      to, len, csum);
293 	skb->csum = csum_block_add(skb->csum, csum, odd);
294 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
295 		nf_ct_attach(skb, org_skb);
296 	return 0;
297 }
298 
299 #if IS_ENABLED(CONFIG_IPV6_MIP6)
300 static void mip6_addr_swap(struct sk_buff *skb)
301 {
302 	struct ipv6hdr *iph = ipv6_hdr(skb);
303 	struct inet6_skb_parm *opt = IP6CB(skb);
304 	struct ipv6_destopt_hao *hao;
305 	struct in6_addr tmp;
306 	int off;
307 
308 	if (opt->dsthao) {
309 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
310 		if (likely(off >= 0)) {
311 			hao = (struct ipv6_destopt_hao *)
312 					(skb_network_header(skb) + off);
313 			tmp = iph->saddr;
314 			iph->saddr = hao->addr;
315 			hao->addr = tmp;
316 		}
317 	}
318 }
319 #else
320 static inline void mip6_addr_swap(struct sk_buff *skb) {}
321 #endif
322 
323 static struct dst_entry *icmpv6_route_lookup(struct net *net,
324 					     struct sk_buff *skb,
325 					     struct sock *sk,
326 					     struct flowi6 *fl6)
327 {
328 	struct dst_entry *dst, *dst2;
329 	struct flowi6 fl2;
330 	int err;
331 
332 	err = ip6_dst_lookup(sk, &dst, fl6);
333 	if (err)
334 		return ERR_PTR(err);
335 
336 	/*
337 	 * We won't send icmp if the destination is known
338 	 * anycast.
339 	 */
340 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
341 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
342 		dst_release(dst);
343 		return ERR_PTR(-EINVAL);
344 	}
345 
346 	/* No need to clone since we're just using its address. */
347 	dst2 = dst;
348 
349 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
350 	if (!IS_ERR(dst)) {
351 		if (dst != dst2)
352 			return dst;
353 	} else {
354 		if (PTR_ERR(dst) == -EPERM)
355 			dst = NULL;
356 		else
357 			return dst;
358 	}
359 
360 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
361 	if (err)
362 		goto relookup_failed;
363 
364 	err = ip6_dst_lookup(sk, &dst2, &fl2);
365 	if (err)
366 		goto relookup_failed;
367 
368 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
369 	if (!IS_ERR(dst2)) {
370 		dst_release(dst);
371 		dst = dst2;
372 	} else {
373 		err = PTR_ERR(dst2);
374 		if (err == -EPERM) {
375 			dst_release(dst);
376 			return dst2;
377 		} else
378 			goto relookup_failed;
379 	}
380 
381 relookup_failed:
382 	if (dst)
383 		return dst;
384 	return ERR_PTR(err);
385 }
386 
387 /*
388  *	Send an ICMP message in response to a packet in error
389  */
390 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
391 {
392 	struct net *net = dev_net(skb->dev);
393 	struct inet6_dev *idev = NULL;
394 	struct ipv6hdr *hdr = ipv6_hdr(skb);
395 	struct sock *sk;
396 	struct ipv6_pinfo *np;
397 	const struct in6_addr *saddr = NULL;
398 	struct dst_entry *dst;
399 	struct icmp6hdr tmp_hdr;
400 	struct flowi6 fl6;
401 	struct icmpv6_msg msg;
402 	int iif = 0;
403 	int addr_type = 0;
404 	int len;
405 	int hlimit;
406 	int err = 0;
407 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
408 
409 	if ((u8 *)hdr < skb->head ||
410 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
411 		return;
412 
413 	/*
414 	 *	Make sure we respect the rules
415 	 *	i.e. RFC 1885 2.4(e)
416 	 *	Rule (e.1) is enforced by not using icmp6_send
417 	 *	in any code that processes icmp errors.
418 	 */
419 	addr_type = ipv6_addr_type(&hdr->daddr);
420 
421 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
422 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
423 		saddr = &hdr->daddr;
424 
425 	/*
426 	 *	Dest addr check
427 	 */
428 
429 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
430 		if (type != ICMPV6_PKT_TOOBIG &&
431 		    !(type == ICMPV6_PARAMPROB &&
432 		      code == ICMPV6_UNK_OPTION &&
433 		      (opt_unrec(skb, info))))
434 			return;
435 
436 		saddr = NULL;
437 	}
438 
439 	addr_type = ipv6_addr_type(&hdr->saddr);
440 
441 	/*
442 	 *	Source addr check
443 	 */
444 
445 	if (__ipv6_addr_needs_scope_id(addr_type))
446 		iif = skb->dev->ifindex;
447 
448 	/*
449 	 *	Must not send error if the source does not uniquely
450 	 *	identify a single node (RFC2463 Section 2.4).
451 	 *	We check unspecified / multicast addresses here,
452 	 *	and anycast addresses will be checked later.
453 	 */
454 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
455 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
456 		return;
457 	}
458 
459 	/*
460 	 *	Never answer to a ICMP packet.
461 	 */
462 	if (is_ineligible(skb)) {
463 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
464 		return;
465 	}
466 
467 	mip6_addr_swap(skb);
468 
469 	memset(&fl6, 0, sizeof(fl6));
470 	fl6.flowi6_proto = IPPROTO_ICMPV6;
471 	fl6.daddr = hdr->saddr;
472 	if (saddr)
473 		fl6.saddr = *saddr;
474 	fl6.flowi6_mark = mark;
475 	fl6.flowi6_oif = iif;
476 	fl6.fl6_icmp_type = type;
477 	fl6.fl6_icmp_code = code;
478 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
479 
480 	sk = icmpv6_xmit_lock(net);
481 	if (sk == NULL)
482 		return;
483 	sk->sk_mark = mark;
484 	np = inet6_sk(sk);
485 
486 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
487 		goto out;
488 
489 	tmp_hdr.icmp6_type = type;
490 	tmp_hdr.icmp6_code = code;
491 	tmp_hdr.icmp6_cksum = 0;
492 	tmp_hdr.icmp6_pointer = htonl(info);
493 
494 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
495 		fl6.flowi6_oif = np->mcast_oif;
496 	else if (!fl6.flowi6_oif)
497 		fl6.flowi6_oif = np->ucast_oif;
498 
499 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
500 	if (IS_ERR(dst))
501 		goto out;
502 
503 	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
504 
505 	msg.skb = skb;
506 	msg.offset = skb_network_offset(skb);
507 	msg.type = type;
508 
509 	len = skb->len - msg.offset;
510 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
511 	if (len < 0) {
512 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
513 		goto out_dst_release;
514 	}
515 
516 	rcu_read_lock();
517 	idev = __in6_dev_get(skb->dev);
518 
519 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
520 			      len + sizeof(struct icmp6hdr),
521 			      sizeof(struct icmp6hdr), hlimit,
522 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
523 			      MSG_DONTWAIT, np->dontfrag);
524 	if (err) {
525 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
526 		ip6_flush_pending_frames(sk);
527 	} else {
528 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
529 						 len + sizeof(struct icmp6hdr));
530 	}
531 	rcu_read_unlock();
532 out_dst_release:
533 	dst_release(dst);
534 out:
535 	icmpv6_xmit_unlock(sk);
536 }
537 
538 /* Slightly more convenient version of icmp6_send.
539  */
540 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
541 {
542 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
543 	kfree_skb(skb);
544 }
545 
546 static void icmpv6_echo_reply(struct sk_buff *skb)
547 {
548 	struct net *net = dev_net(skb->dev);
549 	struct sock *sk;
550 	struct inet6_dev *idev;
551 	struct ipv6_pinfo *np;
552 	const struct in6_addr *saddr = NULL;
553 	struct icmp6hdr *icmph = icmp6_hdr(skb);
554 	struct icmp6hdr tmp_hdr;
555 	struct flowi6 fl6;
556 	struct icmpv6_msg msg;
557 	struct dst_entry *dst;
558 	int err = 0;
559 	int hlimit;
560 	u8 tclass;
561 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
562 
563 	saddr = &ipv6_hdr(skb)->daddr;
564 
565 	if (!ipv6_unicast_destination(skb) &&
566 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
567 	      ipv6_anycast_destination(skb)))
568 		saddr = NULL;
569 
570 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
571 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
572 
573 	memset(&fl6, 0, sizeof(fl6));
574 	fl6.flowi6_proto = IPPROTO_ICMPV6;
575 	fl6.daddr = ipv6_hdr(skb)->saddr;
576 	if (saddr)
577 		fl6.saddr = *saddr;
578 	fl6.flowi6_oif = skb->dev->ifindex;
579 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
580 	fl6.flowi6_mark = mark;
581 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
582 
583 	sk = icmpv6_xmit_lock(net);
584 	if (sk == NULL)
585 		return;
586 	sk->sk_mark = mark;
587 	np = inet6_sk(sk);
588 
589 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
590 		fl6.flowi6_oif = np->mcast_oif;
591 	else if (!fl6.flowi6_oif)
592 		fl6.flowi6_oif = np->ucast_oif;
593 
594 	err = ip6_dst_lookup(sk, &dst, &fl6);
595 	if (err)
596 		goto out;
597 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
598 	if (IS_ERR(dst))
599 		goto out;
600 
601 	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
602 
603 	idev = __in6_dev_get(skb->dev);
604 
605 	msg.skb = skb;
606 	msg.offset = 0;
607 	msg.type = ICMPV6_ECHO_REPLY;
608 
609 	tclass = ipv6_get_dsfield(ipv6_hdr(skb));
610 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
611 				sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
612 				(struct rt6_info *)dst, MSG_DONTWAIT,
613 				np->dontfrag);
614 
615 	if (err) {
616 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
617 		ip6_flush_pending_frames(sk);
618 	} else {
619 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
620 						 skb->len + sizeof(struct icmp6hdr));
621 	}
622 	dst_release(dst);
623 out:
624 	icmpv6_xmit_unlock(sk);
625 }
626 
627 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
628 {
629 	const struct inet6_protocol *ipprot;
630 	int inner_offset;
631 	__be16 frag_off;
632 	u8 nexthdr;
633 	struct net *net = dev_net(skb->dev);
634 
635 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
636 		goto out;
637 
638 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
639 	if (ipv6_ext_hdr(nexthdr)) {
640 		/* now skip over extension headers */
641 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
642 						&nexthdr, &frag_off);
643 		if (inner_offset < 0)
644 			goto out;
645 	} else {
646 		inner_offset = sizeof(struct ipv6hdr);
647 	}
648 
649 	/* Checkin header including 8 bytes of inner protocol header. */
650 	if (!pskb_may_pull(skb, inner_offset+8))
651 		goto out;
652 
653 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
654 	   Without this we will not able f.e. to make source routed
655 	   pmtu discovery.
656 	   Corresponding argument (opt) to notifiers is already added.
657 	   --ANK (980726)
658 	 */
659 
660 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
661 	if (ipprot && ipprot->err_handler)
662 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
663 
664 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
665 	return;
666 
667 out:
668 	ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
669 }
670 
671 /*
672  *	Handle icmp messages
673  */
674 
675 static int icmpv6_rcv(struct sk_buff *skb)
676 {
677 	struct net_device *dev = skb->dev;
678 	struct inet6_dev *idev = __in6_dev_get(dev);
679 	const struct in6_addr *saddr, *daddr;
680 	struct icmp6hdr *hdr;
681 	u8 type;
682 
683 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
684 		struct sec_path *sp = skb_sec_path(skb);
685 		int nh;
686 
687 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
688 				 XFRM_STATE_ICMP))
689 			goto drop_no_count;
690 
691 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
692 			goto drop_no_count;
693 
694 		nh = skb_network_offset(skb);
695 		skb_set_network_header(skb, sizeof(*hdr));
696 
697 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
698 			goto drop_no_count;
699 
700 		skb_set_network_header(skb, nh);
701 	}
702 
703 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
704 
705 	saddr = &ipv6_hdr(skb)->saddr;
706 	daddr = &ipv6_hdr(skb)->daddr;
707 
708 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
709 		LIMIT_NETDEBUG(KERN_DEBUG
710 			       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
711 			       saddr, daddr);
712 		goto csum_error;
713 	}
714 
715 	if (!pskb_pull(skb, sizeof(*hdr)))
716 		goto discard_it;
717 
718 	hdr = icmp6_hdr(skb);
719 
720 	type = hdr->icmp6_type;
721 
722 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
723 
724 	switch (type) {
725 	case ICMPV6_ECHO_REQUEST:
726 		icmpv6_echo_reply(skb);
727 		break;
728 
729 	case ICMPV6_ECHO_REPLY:
730 		ping_rcv(skb);
731 		break;
732 
733 	case ICMPV6_PKT_TOOBIG:
734 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
735 		   standard destination cache. Seems, only "advanced"
736 		   destination cache will allow to solve this problem
737 		   --ANK (980726)
738 		 */
739 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
740 			goto discard_it;
741 		hdr = icmp6_hdr(skb);
742 
743 		/*
744 		 *	Drop through to notify
745 		 */
746 
747 	case ICMPV6_DEST_UNREACH:
748 	case ICMPV6_TIME_EXCEED:
749 	case ICMPV6_PARAMPROB:
750 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
751 		break;
752 
753 	case NDISC_ROUTER_SOLICITATION:
754 	case NDISC_ROUTER_ADVERTISEMENT:
755 	case NDISC_NEIGHBOUR_SOLICITATION:
756 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
757 	case NDISC_REDIRECT:
758 		ndisc_rcv(skb);
759 		break;
760 
761 	case ICMPV6_MGM_QUERY:
762 		igmp6_event_query(skb);
763 		break;
764 
765 	case ICMPV6_MGM_REPORT:
766 		igmp6_event_report(skb);
767 		break;
768 
769 	case ICMPV6_MGM_REDUCTION:
770 	case ICMPV6_NI_QUERY:
771 	case ICMPV6_NI_REPLY:
772 	case ICMPV6_MLD2_REPORT:
773 	case ICMPV6_DHAAD_REQUEST:
774 	case ICMPV6_DHAAD_REPLY:
775 	case ICMPV6_MOBILE_PREFIX_SOL:
776 	case ICMPV6_MOBILE_PREFIX_ADV:
777 		break;
778 
779 	default:
780 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
781 
782 		/* informational */
783 		if (type & ICMPV6_INFOMSG_MASK)
784 			break;
785 
786 		/*
787 		 * error of unknown type.
788 		 * must pass to upper level
789 		 */
790 
791 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
792 	}
793 
794 	kfree_skb(skb);
795 	return 0;
796 
797 csum_error:
798 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
799 discard_it:
800 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
801 drop_no_count:
802 	kfree_skb(skb);
803 	return 0;
804 }
805 
806 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
807 		      u8 type,
808 		      const struct in6_addr *saddr,
809 		      const struct in6_addr *daddr,
810 		      int oif)
811 {
812 	memset(fl6, 0, sizeof(*fl6));
813 	fl6->saddr = *saddr;
814 	fl6->daddr = *daddr;
815 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
816 	fl6->fl6_icmp_type	= type;
817 	fl6->fl6_icmp_code	= 0;
818 	fl6->flowi6_oif		= oif;
819 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
820 }
821 
822 /*
823  * Special lock-class for __icmpv6_sk:
824  */
825 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
826 
827 static int __net_init icmpv6_sk_init(struct net *net)
828 {
829 	struct sock *sk;
830 	int err, i, j;
831 
832 	net->ipv6.icmp_sk =
833 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
834 	if (net->ipv6.icmp_sk == NULL)
835 		return -ENOMEM;
836 
837 	for_each_possible_cpu(i) {
838 		err = inet_ctl_sock_create(&sk, PF_INET6,
839 					   SOCK_RAW, IPPROTO_ICMPV6, net);
840 		if (err < 0) {
841 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
842 			       err);
843 			goto fail;
844 		}
845 
846 		net->ipv6.icmp_sk[i] = sk;
847 
848 		/*
849 		 * Split off their lock-class, because sk->sk_dst_lock
850 		 * gets used from softirqs, which is safe for
851 		 * __icmpv6_sk (because those never get directly used
852 		 * via userspace syscalls), but unsafe for normal sockets.
853 		 */
854 		lockdep_set_class(&sk->sk_dst_lock,
855 				  &icmpv6_socket_sk_dst_lock_key);
856 
857 		/* Enough space for 2 64K ICMP packets, including
858 		 * sk_buff struct overhead.
859 		 */
860 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
861 	}
862 	return 0;
863 
864  fail:
865 	for (j = 0; j < i; j++)
866 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
867 	kfree(net->ipv6.icmp_sk);
868 	return err;
869 }
870 
871 static void __net_exit icmpv6_sk_exit(struct net *net)
872 {
873 	int i;
874 
875 	for_each_possible_cpu(i) {
876 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
877 	}
878 	kfree(net->ipv6.icmp_sk);
879 }
880 
881 static struct pernet_operations icmpv6_sk_ops = {
882 	.init = icmpv6_sk_init,
883 	.exit = icmpv6_sk_exit,
884 };
885 
886 int __init icmpv6_init(void)
887 {
888 	int err;
889 
890 	err = register_pernet_subsys(&icmpv6_sk_ops);
891 	if (err < 0)
892 		return err;
893 
894 	err = -EAGAIN;
895 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
896 		goto fail;
897 
898 	err = inet6_register_icmp_sender(icmp6_send);
899 	if (err)
900 		goto sender_reg_err;
901 	return 0;
902 
903 sender_reg_err:
904 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
905 fail:
906 	pr_err("Failed to register ICMP6 protocol\n");
907 	unregister_pernet_subsys(&icmpv6_sk_ops);
908 	return err;
909 }
910 
911 void icmpv6_cleanup(void)
912 {
913 	inet6_unregister_icmp_sender(icmp6_send);
914 	unregister_pernet_subsys(&icmpv6_sk_ops);
915 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
916 }
917 
918 
919 static const struct icmp6_err {
920 	int err;
921 	int fatal;
922 } tab_unreach[] = {
923 	{	/* NOROUTE */
924 		.err	= ENETUNREACH,
925 		.fatal	= 0,
926 	},
927 	{	/* ADM_PROHIBITED */
928 		.err	= EACCES,
929 		.fatal	= 1,
930 	},
931 	{	/* Was NOT_NEIGHBOUR, now reserved */
932 		.err	= EHOSTUNREACH,
933 		.fatal	= 0,
934 	},
935 	{	/* ADDR_UNREACH	*/
936 		.err	= EHOSTUNREACH,
937 		.fatal	= 0,
938 	},
939 	{	/* PORT_UNREACH	*/
940 		.err	= ECONNREFUSED,
941 		.fatal	= 1,
942 	},
943 	{	/* POLICY_FAIL */
944 		.err	= EACCES,
945 		.fatal	= 1,
946 	},
947 	{	/* REJECT_ROUTE	*/
948 		.err	= EACCES,
949 		.fatal	= 1,
950 	},
951 };
952 
953 int icmpv6_err_convert(u8 type, u8 code, int *err)
954 {
955 	int fatal = 0;
956 
957 	*err = EPROTO;
958 
959 	switch (type) {
960 	case ICMPV6_DEST_UNREACH:
961 		fatal = 1;
962 		if (code < ARRAY_SIZE(tab_unreach)) {
963 			*err  = tab_unreach[code].err;
964 			fatal = tab_unreach[code].fatal;
965 		}
966 		break;
967 
968 	case ICMPV6_PKT_TOOBIG:
969 		*err = EMSGSIZE;
970 		break;
971 
972 	case ICMPV6_PARAMPROB:
973 		*err = EPROTO;
974 		fatal = 1;
975 		break;
976 
977 	case ICMPV6_TIME_EXCEED:
978 		*err = EHOSTUNREACH;
979 		break;
980 	}
981 
982 	return fatal;
983 }
984 EXPORT_SYMBOL(icmpv6_err_convert);
985 
986 #ifdef CONFIG_SYSCTL
987 static struct ctl_table ipv6_icmp_table_template[] = {
988 	{
989 		.procname	= "ratelimit",
990 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
991 		.maxlen		= sizeof(int),
992 		.mode		= 0644,
993 		.proc_handler	= proc_dointvec_ms_jiffies,
994 	},
995 	{ },
996 };
997 
998 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
999 {
1000 	struct ctl_table *table;
1001 
1002 	table = kmemdup(ipv6_icmp_table_template,
1003 			sizeof(ipv6_icmp_table_template),
1004 			GFP_KERNEL);
1005 
1006 	if (table)
1007 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1008 
1009 	return table;
1010 }
1011 #endif
1012 
1013