xref: /openbmc/linux/net/ipv6/icmp.c (revision e8243534)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 
71 #include <asm/uaccess.h>
72 
73 /*
74  *	The ICMP socket(s). This is the most convenient way to flow control
75  *	our ICMP output as well as maintain a clean interface throughout
76  *	all layers. All Socketless IP sends will soon be gone.
77  *
78  *	On SMP we have one ICMP socket per-cpu.
79  */
80 static inline struct sock *icmpv6_sk(struct net *net)
81 {
82 	return net->ipv6.icmp_sk[smp_processor_id()];
83 }
84 
85 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
86 		       u8 type, u8 code, int offset, __be32 info)
87 {
88 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
89 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
90 	struct net *net = dev_net(skb->dev);
91 
92 	if (type == ICMPV6_PKT_TOOBIG)
93 		ip6_update_pmtu(skb, net, info, 0, 0);
94 	else if (type == NDISC_REDIRECT)
95 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
96 
97 	if (!(type & ICMPV6_INFOMSG_MASK))
98 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
99 			ping_err(skb, offset, info);
100 }
101 
102 static int icmpv6_rcv(struct sk_buff *skb);
103 
104 static const struct inet6_protocol icmpv6_protocol = {
105 	.handler	=	icmpv6_rcv,
106 	.err_handler	=	icmpv6_err,
107 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
108 };
109 
110 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
111 {
112 	struct sock *sk;
113 
114 	local_bh_disable();
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		local_bh_enable();
123 		return NULL;
124 	}
125 	return sk;
126 }
127 
128 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
129 {
130 	spin_unlock_bh(&sk->sk_lock.slock);
131 }
132 
133 /*
134  * Figure out, may we reply to this packet with icmp error.
135  *
136  * We do not reply, if:
137  *	- it was icmp error message.
138  *	- it is truncated, so that it is known, that protocol is ICMPV6
139  *	  (i.e. in the middle of some exthdr)
140  *
141  *	--ANK (980726)
142  */
143 
144 static bool is_ineligible(const struct sk_buff *skb)
145 {
146 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
147 	int len = skb->len - ptr;
148 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
149 	__be16 frag_off;
150 
151 	if (len < 0)
152 		return true;
153 
154 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
155 	if (ptr < 0)
156 		return false;
157 	if (nexthdr == IPPROTO_ICMPV6) {
158 		u8 _type, *tp;
159 		tp = skb_header_pointer(skb,
160 			ptr+offsetof(struct icmp6hdr, icmp6_type),
161 			sizeof(_type), &_type);
162 		if (tp == NULL ||
163 		    !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 /*
170  * Check the ICMP output rate limit
171  */
172 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
173 				      struct flowi6 *fl6)
174 {
175 	struct dst_entry *dst;
176 	struct net *net = sock_net(sk);
177 	bool res = false;
178 
179 	/* Informational messages are not limited. */
180 	if (type & ICMPV6_INFOMSG_MASK)
181 		return true;
182 
183 	/* Do not limit pmtu discovery, it would break it. */
184 	if (type == ICMPV6_PKT_TOOBIG)
185 		return true;
186 
187 	/*
188 	 * Look up the output route.
189 	 * XXX: perhaps the expire for routing entries cloned by
190 	 * this lookup should be more aggressive (not longer than timeout).
191 	 */
192 	dst = ip6_route_output(net, sk, fl6);
193 	if (dst->error) {
194 		IP6_INC_STATS(net, ip6_dst_idev(dst),
195 			      IPSTATS_MIB_OUTNOROUTES);
196 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
197 		res = true;
198 	} else {
199 		struct rt6_info *rt = (struct rt6_info *)dst;
200 		int tmo = net->ipv6.sysctl.icmpv6_time;
201 		struct inet_peer *peer;
202 
203 		/* Give more bandwidth to wider prefixes. */
204 		if (rt->rt6i_dst.plen < 128)
205 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
206 
207 		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
208 		res = inet_peer_xrlim_allow(peer, tmo);
209 		if (peer)
210 			inet_putpeer(peer);
211 	}
212 	dst_release(dst);
213 	return res;
214 }
215 
216 /*
217  *	an inline helper for the "simple" if statement below
218  *	checks if parameter problem report is caused by an
219  *	unrecognized IPv6 option that has the Option Type
220  *	highest-order two bits set to 10
221  */
222 
223 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
224 {
225 	u8 _optval, *op;
226 
227 	offset += skb_network_offset(skb);
228 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
229 	if (op == NULL)
230 		return true;
231 	return (*op & 0xC0) == 0x80;
232 }
233 
234 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
235 			       struct icmp6hdr *thdr, int len)
236 {
237 	struct sk_buff *skb;
238 	struct icmp6hdr *icmp6h;
239 	int err = 0;
240 
241 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
242 		goto out;
243 
244 	icmp6h = icmp6_hdr(skb);
245 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
246 	icmp6h->icmp6_cksum = 0;
247 
248 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
249 		skb->csum = csum_partial(icmp6h,
250 					sizeof(struct icmp6hdr), skb->csum);
251 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
252 						      &fl6->daddr,
253 						      len, fl6->flowi6_proto,
254 						      skb->csum);
255 	} else {
256 		__wsum tmp_csum = 0;
257 
258 		skb_queue_walk(&sk->sk_write_queue, skb) {
259 			tmp_csum = csum_add(tmp_csum, skb->csum);
260 		}
261 
262 		tmp_csum = csum_partial(icmp6h,
263 					sizeof(struct icmp6hdr), tmp_csum);
264 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
265 						      &fl6->daddr,
266 						      len, fl6->flowi6_proto,
267 						      tmp_csum);
268 	}
269 	ip6_push_pending_frames(sk);
270 out:
271 	return err;
272 }
273 
274 struct icmpv6_msg {
275 	struct sk_buff	*skb;
276 	int		offset;
277 	uint8_t		type;
278 };
279 
280 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
281 {
282 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
283 	struct sk_buff *org_skb = msg->skb;
284 	__wsum csum = 0;
285 
286 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
287 				      to, len, csum);
288 	skb->csum = csum_block_add(skb->csum, csum, odd);
289 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
290 		nf_ct_attach(skb, org_skb);
291 	return 0;
292 }
293 
294 #if IS_ENABLED(CONFIG_IPV6_MIP6)
295 static void mip6_addr_swap(struct sk_buff *skb)
296 {
297 	struct ipv6hdr *iph = ipv6_hdr(skb);
298 	struct inet6_skb_parm *opt = IP6CB(skb);
299 	struct ipv6_destopt_hao *hao;
300 	struct in6_addr tmp;
301 	int off;
302 
303 	if (opt->dsthao) {
304 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
305 		if (likely(off >= 0)) {
306 			hao = (struct ipv6_destopt_hao *)
307 					(skb_network_header(skb) + off);
308 			tmp = iph->saddr;
309 			iph->saddr = hao->addr;
310 			hao->addr = tmp;
311 		}
312 	}
313 }
314 #else
315 static inline void mip6_addr_swap(struct sk_buff *skb) {}
316 #endif
317 
318 static struct dst_entry *icmpv6_route_lookup(struct net *net,
319 					     struct sk_buff *skb,
320 					     struct sock *sk,
321 					     struct flowi6 *fl6)
322 {
323 	struct dst_entry *dst, *dst2;
324 	struct flowi6 fl2;
325 	int err;
326 
327 	err = ip6_dst_lookup(sk, &dst, fl6);
328 	if (err)
329 		return ERR_PTR(err);
330 
331 	/*
332 	 * We won't send icmp if the destination is known
333 	 * anycast.
334 	 */
335 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
336 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
337 		dst_release(dst);
338 		return ERR_PTR(-EINVAL);
339 	}
340 
341 	/* No need to clone since we're just using its address. */
342 	dst2 = dst;
343 
344 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
345 	if (!IS_ERR(dst)) {
346 		if (dst != dst2)
347 			return dst;
348 	} else {
349 		if (PTR_ERR(dst) == -EPERM)
350 			dst = NULL;
351 		else
352 			return dst;
353 	}
354 
355 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
356 	if (err)
357 		goto relookup_failed;
358 
359 	err = ip6_dst_lookup(sk, &dst2, &fl2);
360 	if (err)
361 		goto relookup_failed;
362 
363 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
364 	if (!IS_ERR(dst2)) {
365 		dst_release(dst);
366 		dst = dst2;
367 	} else {
368 		err = PTR_ERR(dst2);
369 		if (err == -EPERM) {
370 			dst_release(dst);
371 			return dst2;
372 		} else
373 			goto relookup_failed;
374 	}
375 
376 relookup_failed:
377 	if (dst)
378 		return dst;
379 	return ERR_PTR(err);
380 }
381 
382 /*
383  *	Send an ICMP message in response to a packet in error
384  */
385 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
386 {
387 	struct net *net = dev_net(skb->dev);
388 	struct inet6_dev *idev = NULL;
389 	struct ipv6hdr *hdr = ipv6_hdr(skb);
390 	struct sock *sk;
391 	struct ipv6_pinfo *np;
392 	const struct in6_addr *saddr = NULL;
393 	struct dst_entry *dst;
394 	struct icmp6hdr tmp_hdr;
395 	struct flowi6 fl6;
396 	struct icmpv6_msg msg;
397 	int iif = 0;
398 	int addr_type = 0;
399 	int len;
400 	int hlimit;
401 	int err = 0;
402 
403 	if ((u8 *)hdr < skb->head ||
404 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
405 		return;
406 
407 	/*
408 	 *	Make sure we respect the rules
409 	 *	i.e. RFC 1885 2.4(e)
410 	 *	Rule (e.1) is enforced by not using icmp6_send
411 	 *	in any code that processes icmp errors.
412 	 */
413 	addr_type = ipv6_addr_type(&hdr->daddr);
414 
415 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
416 		saddr = &hdr->daddr;
417 
418 	/*
419 	 *	Dest addr check
420 	 */
421 
422 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
423 		if (type != ICMPV6_PKT_TOOBIG &&
424 		    !(type == ICMPV6_PARAMPROB &&
425 		      code == ICMPV6_UNK_OPTION &&
426 		      (opt_unrec(skb, info))))
427 			return;
428 
429 		saddr = NULL;
430 	}
431 
432 	addr_type = ipv6_addr_type(&hdr->saddr);
433 
434 	/*
435 	 *	Source addr check
436 	 */
437 
438 	if (__ipv6_addr_needs_scope_id(addr_type))
439 		iif = skb->dev->ifindex;
440 
441 	/*
442 	 *	Must not send error if the source does not uniquely
443 	 *	identify a single node (RFC2463 Section 2.4).
444 	 *	We check unspecified / multicast addresses here,
445 	 *	and anycast addresses will be checked later.
446 	 */
447 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
448 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
449 		return;
450 	}
451 
452 	/*
453 	 *	Never answer to a ICMP packet.
454 	 */
455 	if (is_ineligible(skb)) {
456 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
457 		return;
458 	}
459 
460 	mip6_addr_swap(skb);
461 
462 	memset(&fl6, 0, sizeof(fl6));
463 	fl6.flowi6_proto = IPPROTO_ICMPV6;
464 	fl6.daddr = hdr->saddr;
465 	if (saddr)
466 		fl6.saddr = *saddr;
467 	fl6.flowi6_oif = iif;
468 	fl6.fl6_icmp_type = type;
469 	fl6.fl6_icmp_code = code;
470 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
471 
472 	sk = icmpv6_xmit_lock(net);
473 	if (sk == NULL)
474 		return;
475 	np = inet6_sk(sk);
476 
477 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
478 		goto out;
479 
480 	tmp_hdr.icmp6_type = type;
481 	tmp_hdr.icmp6_code = code;
482 	tmp_hdr.icmp6_cksum = 0;
483 	tmp_hdr.icmp6_pointer = htonl(info);
484 
485 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
486 		fl6.flowi6_oif = np->mcast_oif;
487 	else if (!fl6.flowi6_oif)
488 		fl6.flowi6_oif = np->ucast_oif;
489 
490 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
491 	if (IS_ERR(dst))
492 		goto out;
493 
494 	if (ipv6_addr_is_multicast(&fl6.daddr))
495 		hlimit = np->mcast_hops;
496 	else
497 		hlimit = np->hop_limit;
498 	if (hlimit < 0)
499 		hlimit = ip6_dst_hoplimit(dst);
500 
501 	msg.skb = skb;
502 	msg.offset = skb_network_offset(skb);
503 	msg.type = type;
504 
505 	len = skb->len - msg.offset;
506 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
507 	if (len < 0) {
508 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
509 		goto out_dst_release;
510 	}
511 
512 	rcu_read_lock();
513 	idev = __in6_dev_get(skb->dev);
514 
515 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
516 			      len + sizeof(struct icmp6hdr),
517 			      sizeof(struct icmp6hdr), hlimit,
518 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
519 			      MSG_DONTWAIT, np->dontfrag);
520 	if (err) {
521 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
522 		ip6_flush_pending_frames(sk);
523 	} else {
524 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
525 						 len + sizeof(struct icmp6hdr));
526 	}
527 	rcu_read_unlock();
528 out_dst_release:
529 	dst_release(dst);
530 out:
531 	icmpv6_xmit_unlock(sk);
532 }
533 
534 /* Slightly more convenient version of icmp6_send.
535  */
536 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
537 {
538 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
539 	kfree_skb(skb);
540 }
541 
542 static void icmpv6_echo_reply(struct sk_buff *skb)
543 {
544 	struct net *net = dev_net(skb->dev);
545 	struct sock *sk;
546 	struct inet6_dev *idev;
547 	struct ipv6_pinfo *np;
548 	const struct in6_addr *saddr = NULL;
549 	struct icmp6hdr *icmph = icmp6_hdr(skb);
550 	struct icmp6hdr tmp_hdr;
551 	struct flowi6 fl6;
552 	struct icmpv6_msg msg;
553 	struct dst_entry *dst;
554 	int err = 0;
555 	int hlimit;
556 
557 	saddr = &ipv6_hdr(skb)->daddr;
558 
559 	if (!ipv6_unicast_destination(skb))
560 		saddr = NULL;
561 
562 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
563 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
564 
565 	memset(&fl6, 0, sizeof(fl6));
566 	fl6.flowi6_proto = IPPROTO_ICMPV6;
567 	fl6.daddr = ipv6_hdr(skb)->saddr;
568 	if (saddr)
569 		fl6.saddr = *saddr;
570 	fl6.flowi6_oif = skb->dev->ifindex;
571 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
572 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
573 
574 	sk = icmpv6_xmit_lock(net);
575 	if (sk == NULL)
576 		return;
577 	np = inet6_sk(sk);
578 
579 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
580 		fl6.flowi6_oif = np->mcast_oif;
581 	else if (!fl6.flowi6_oif)
582 		fl6.flowi6_oif = np->ucast_oif;
583 
584 	err = ip6_dst_lookup(sk, &dst, &fl6);
585 	if (err)
586 		goto out;
587 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
588 	if (IS_ERR(dst))
589 		goto out;
590 
591 	if (ipv6_addr_is_multicast(&fl6.daddr))
592 		hlimit = np->mcast_hops;
593 	else
594 		hlimit = np->hop_limit;
595 	if (hlimit < 0)
596 		hlimit = ip6_dst_hoplimit(dst);
597 
598 	idev = __in6_dev_get(skb->dev);
599 
600 	msg.skb = skb;
601 	msg.offset = 0;
602 	msg.type = ICMPV6_ECHO_REPLY;
603 
604 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
605 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
606 				(struct rt6_info *)dst, MSG_DONTWAIT,
607 				np->dontfrag);
608 
609 	if (err) {
610 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
611 		ip6_flush_pending_frames(sk);
612 	} else {
613 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
614 						 skb->len + sizeof(struct icmp6hdr));
615 	}
616 	dst_release(dst);
617 out:
618 	icmpv6_xmit_unlock(sk);
619 }
620 
621 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
622 {
623 	const struct inet6_protocol *ipprot;
624 	int inner_offset;
625 	__be16 frag_off;
626 	u8 nexthdr;
627 
628 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
629 		return;
630 
631 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
632 	if (ipv6_ext_hdr(nexthdr)) {
633 		/* now skip over extension headers */
634 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
635 						&nexthdr, &frag_off);
636 		if (inner_offset<0)
637 			return;
638 	} else {
639 		inner_offset = sizeof(struct ipv6hdr);
640 	}
641 
642 	/* Checkin header including 8 bytes of inner protocol header. */
643 	if (!pskb_may_pull(skb, inner_offset+8))
644 		return;
645 
646 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
647 	   Without this we will not able f.e. to make source routed
648 	   pmtu discovery.
649 	   Corresponding argument (opt) to notifiers is already added.
650 	   --ANK (980726)
651 	 */
652 
653 	rcu_read_lock();
654 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
655 	if (ipprot && ipprot->err_handler)
656 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
657 	rcu_read_unlock();
658 
659 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
660 }
661 
662 /*
663  *	Handle icmp messages
664  */
665 
666 static int icmpv6_rcv(struct sk_buff *skb)
667 {
668 	struct net_device *dev = skb->dev;
669 	struct inet6_dev *idev = __in6_dev_get(dev);
670 	const struct in6_addr *saddr, *daddr;
671 	struct icmp6hdr *hdr;
672 	u8 type;
673 
674 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
675 		struct sec_path *sp = skb_sec_path(skb);
676 		int nh;
677 
678 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
679 				 XFRM_STATE_ICMP))
680 			goto drop_no_count;
681 
682 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
683 			goto drop_no_count;
684 
685 		nh = skb_network_offset(skb);
686 		skb_set_network_header(skb, sizeof(*hdr));
687 
688 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
689 			goto drop_no_count;
690 
691 		skb_set_network_header(skb, nh);
692 	}
693 
694 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
695 
696 	saddr = &ipv6_hdr(skb)->saddr;
697 	daddr = &ipv6_hdr(skb)->daddr;
698 
699 	/* Perform checksum. */
700 	switch (skb->ip_summed) {
701 	case CHECKSUM_COMPLETE:
702 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
703 				     skb->csum))
704 			break;
705 		/* fall through */
706 	case CHECKSUM_NONE:
707 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
708 					     IPPROTO_ICMPV6, 0));
709 		if (__skb_checksum_complete(skb)) {
710 			LIMIT_NETDEBUG(KERN_DEBUG
711 				       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
712 				       saddr, daddr);
713 			goto csum_error;
714 		}
715 	}
716 
717 	if (!pskb_pull(skb, sizeof(*hdr)))
718 		goto discard_it;
719 
720 	hdr = icmp6_hdr(skb);
721 
722 	type = hdr->icmp6_type;
723 
724 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
725 
726 	switch (type) {
727 	case ICMPV6_ECHO_REQUEST:
728 		icmpv6_echo_reply(skb);
729 		break;
730 
731 	case ICMPV6_ECHO_REPLY:
732 		ping_rcv(skb);
733 		break;
734 
735 	case ICMPV6_PKT_TOOBIG:
736 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
737 		   standard destination cache. Seems, only "advanced"
738 		   destination cache will allow to solve this problem
739 		   --ANK (980726)
740 		 */
741 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
742 			goto discard_it;
743 		hdr = icmp6_hdr(skb);
744 
745 		/*
746 		 *	Drop through to notify
747 		 */
748 
749 	case ICMPV6_DEST_UNREACH:
750 	case ICMPV6_TIME_EXCEED:
751 	case ICMPV6_PARAMPROB:
752 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
753 		break;
754 
755 	case NDISC_ROUTER_SOLICITATION:
756 	case NDISC_ROUTER_ADVERTISEMENT:
757 	case NDISC_NEIGHBOUR_SOLICITATION:
758 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
759 	case NDISC_REDIRECT:
760 		ndisc_rcv(skb);
761 		break;
762 
763 	case ICMPV6_MGM_QUERY:
764 		igmp6_event_query(skb);
765 		break;
766 
767 	case ICMPV6_MGM_REPORT:
768 		igmp6_event_report(skb);
769 		break;
770 
771 	case ICMPV6_MGM_REDUCTION:
772 	case ICMPV6_NI_QUERY:
773 	case ICMPV6_NI_REPLY:
774 	case ICMPV6_MLD2_REPORT:
775 	case ICMPV6_DHAAD_REQUEST:
776 	case ICMPV6_DHAAD_REPLY:
777 	case ICMPV6_MOBILE_PREFIX_SOL:
778 	case ICMPV6_MOBILE_PREFIX_ADV:
779 		break;
780 
781 	default:
782 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
783 
784 		/* informational */
785 		if (type & ICMPV6_INFOMSG_MASK)
786 			break;
787 
788 		/*
789 		 * error of unknown type.
790 		 * must pass to upper level
791 		 */
792 
793 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
794 	}
795 
796 	kfree_skb(skb);
797 	return 0;
798 
799 csum_error:
800 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
801 discard_it:
802 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
803 drop_no_count:
804 	kfree_skb(skb);
805 	return 0;
806 }
807 
808 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
809 		      u8 type,
810 		      const struct in6_addr *saddr,
811 		      const struct in6_addr *daddr,
812 		      int oif)
813 {
814 	memset(fl6, 0, sizeof(*fl6));
815 	fl6->saddr = *saddr;
816 	fl6->daddr = *daddr;
817 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
818 	fl6->fl6_icmp_type	= type;
819 	fl6->fl6_icmp_code	= 0;
820 	fl6->flowi6_oif		= oif;
821 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
822 }
823 
824 /*
825  * Special lock-class for __icmpv6_sk:
826  */
827 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
828 
829 static int __net_init icmpv6_sk_init(struct net *net)
830 {
831 	struct sock *sk;
832 	int err, i, j;
833 
834 	net->ipv6.icmp_sk =
835 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
836 	if (net->ipv6.icmp_sk == NULL)
837 		return -ENOMEM;
838 
839 	for_each_possible_cpu(i) {
840 		err = inet_ctl_sock_create(&sk, PF_INET6,
841 					   SOCK_RAW, IPPROTO_ICMPV6, net);
842 		if (err < 0) {
843 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
844 			       err);
845 			goto fail;
846 		}
847 
848 		net->ipv6.icmp_sk[i] = sk;
849 
850 		/*
851 		 * Split off their lock-class, because sk->sk_dst_lock
852 		 * gets used from softirqs, which is safe for
853 		 * __icmpv6_sk (because those never get directly used
854 		 * via userspace syscalls), but unsafe for normal sockets.
855 		 */
856 		lockdep_set_class(&sk->sk_dst_lock,
857 				  &icmpv6_socket_sk_dst_lock_key);
858 
859 		/* Enough space for 2 64K ICMP packets, including
860 		 * sk_buff struct overhead.
861 		 */
862 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
863 	}
864 	return 0;
865 
866  fail:
867 	for (j = 0; j < i; j++)
868 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
869 	kfree(net->ipv6.icmp_sk);
870 	return err;
871 }
872 
873 static void __net_exit icmpv6_sk_exit(struct net *net)
874 {
875 	int i;
876 
877 	for_each_possible_cpu(i) {
878 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
879 	}
880 	kfree(net->ipv6.icmp_sk);
881 }
882 
883 static struct pernet_operations icmpv6_sk_ops = {
884        .init = icmpv6_sk_init,
885        .exit = icmpv6_sk_exit,
886 };
887 
888 int __init icmpv6_init(void)
889 {
890 	int err;
891 
892 	err = register_pernet_subsys(&icmpv6_sk_ops);
893 	if (err < 0)
894 		return err;
895 
896 	err = -EAGAIN;
897 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
898 		goto fail;
899 
900 	err = inet6_register_icmp_sender(icmp6_send);
901 	if (err)
902 		goto sender_reg_err;
903 	return 0;
904 
905 sender_reg_err:
906 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
907 fail:
908 	pr_err("Failed to register ICMP6 protocol\n");
909 	unregister_pernet_subsys(&icmpv6_sk_ops);
910 	return err;
911 }
912 
913 void icmpv6_cleanup(void)
914 {
915 	inet6_unregister_icmp_sender(icmp6_send);
916 	unregister_pernet_subsys(&icmpv6_sk_ops);
917 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
918 }
919 
920 
921 static const struct icmp6_err {
922 	int err;
923 	int fatal;
924 } tab_unreach[] = {
925 	{	/* NOROUTE */
926 		.err	= ENETUNREACH,
927 		.fatal	= 0,
928 	},
929 	{	/* ADM_PROHIBITED */
930 		.err	= EACCES,
931 		.fatal	= 1,
932 	},
933 	{	/* Was NOT_NEIGHBOUR, now reserved */
934 		.err	= EHOSTUNREACH,
935 		.fatal	= 0,
936 	},
937 	{	/* ADDR_UNREACH	*/
938 		.err	= EHOSTUNREACH,
939 		.fatal	= 0,
940 	},
941 	{	/* PORT_UNREACH	*/
942 		.err	= ECONNREFUSED,
943 		.fatal	= 1,
944 	},
945 	{	/* POLICY_FAIL */
946 		.err	= EACCES,
947 		.fatal	= 1,
948 	},
949 	{	/* REJECT_ROUTE	*/
950 		.err	= EACCES,
951 		.fatal	= 1,
952 	},
953 };
954 
955 int icmpv6_err_convert(u8 type, u8 code, int *err)
956 {
957 	int fatal = 0;
958 
959 	*err = EPROTO;
960 
961 	switch (type) {
962 	case ICMPV6_DEST_UNREACH:
963 		fatal = 1;
964 		if (code < ARRAY_SIZE(tab_unreach)) {
965 			*err  = tab_unreach[code].err;
966 			fatal = tab_unreach[code].fatal;
967 		}
968 		break;
969 
970 	case ICMPV6_PKT_TOOBIG:
971 		*err = EMSGSIZE;
972 		break;
973 
974 	case ICMPV6_PARAMPROB:
975 		*err = EPROTO;
976 		fatal = 1;
977 		break;
978 
979 	case ICMPV6_TIME_EXCEED:
980 		*err = EHOSTUNREACH;
981 		break;
982 	}
983 
984 	return fatal;
985 }
986 EXPORT_SYMBOL(icmpv6_err_convert);
987 
988 #ifdef CONFIG_SYSCTL
989 static struct ctl_table ipv6_icmp_table_template[] = {
990 	{
991 		.procname	= "ratelimit",
992 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
993 		.maxlen		= sizeof(int),
994 		.mode		= 0644,
995 		.proc_handler	= proc_dointvec_ms_jiffies,
996 	},
997 	{ },
998 };
999 
1000 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1001 {
1002 	struct ctl_table *table;
1003 
1004 	table = kmemdup(ipv6_icmp_table_template,
1005 			sizeof(ipv6_icmp_table_template),
1006 			GFP_KERNEL);
1007 
1008 	if (table)
1009 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1010 
1011 	return table;
1012 }
1013 #endif
1014 
1015