xref: /openbmc/linux/net/ipv6/icmp.c (revision 3a9a231d)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #include <linux/module.h>
33 #include <linux/errno.h>
34 #include <linux/types.h>
35 #include <linux/socket.h>
36 #include <linux/in.h>
37 #include <linux/kernel.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/skbuff.h>
41 #include <linux/init.h>
42 #include <linux/netfilter.h>
43 #include <linux/slab.h>
44 
45 #ifdef CONFIG_SYSCTL
46 #include <linux/sysctl.h>
47 #endif
48 
49 #include <linux/inet.h>
50 #include <linux/netdevice.h>
51 #include <linux/icmpv6.h>
52 
53 #include <net/ip.h>
54 #include <net/sock.h>
55 
56 #include <net/ipv6.h>
57 #include <net/ip6_checksum.h>
58 #include <net/protocol.h>
59 #include <net/raw.h>
60 #include <net/rawv6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static inline struct sock *icmpv6_sk(struct net *net)
79 {
80 	return net->ipv6.icmp_sk[smp_processor_id()];
81 }
82 
83 static int icmpv6_rcv(struct sk_buff *skb);
84 
85 static const struct inet6_protocol icmpv6_protocol = {
86 	.handler	=	icmpv6_rcv,
87 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
88 };
89 
90 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
91 {
92 	struct sock *sk;
93 
94 	local_bh_disable();
95 
96 	sk = icmpv6_sk(net);
97 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
98 		/* This can happen if the output path (f.e. SIT or
99 		 * ip6ip6 tunnel) signals dst_link_failure() for an
100 		 * outgoing ICMP6 packet.
101 		 */
102 		local_bh_enable();
103 		return NULL;
104 	}
105 	return sk;
106 }
107 
108 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
109 {
110 	spin_unlock_bh(&sk->sk_lock.slock);
111 }
112 
113 /*
114  * Slightly more convenient version of icmpv6_send.
115  */
116 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
117 {
118 	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
119 	kfree_skb(skb);
120 }
121 
122 /*
123  * Figure out, may we reply to this packet with icmp error.
124  *
125  * We do not reply, if:
126  *	- it was icmp error message.
127  *	- it is truncated, so that it is known, that protocol is ICMPV6
128  *	  (i.e. in the middle of some exthdr)
129  *
130  *	--ANK (980726)
131  */
132 
133 static int is_ineligible(struct sk_buff *skb)
134 {
135 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
136 	int len = skb->len - ptr;
137 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
138 
139 	if (len < 0)
140 		return 1;
141 
142 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
143 	if (ptr < 0)
144 		return 0;
145 	if (nexthdr == IPPROTO_ICMPV6) {
146 		u8 _type, *tp;
147 		tp = skb_header_pointer(skb,
148 			ptr+offsetof(struct icmp6hdr, icmp6_type),
149 			sizeof(_type), &_type);
150 		if (tp == NULL ||
151 		    !(*tp & ICMPV6_INFOMSG_MASK))
152 			return 1;
153 	}
154 	return 0;
155 }
156 
157 /*
158  * Check the ICMP output rate limit
159  */
160 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
161 				      struct flowi6 *fl6)
162 {
163 	struct dst_entry *dst;
164 	struct net *net = sock_net(sk);
165 	bool res = false;
166 
167 	/* Informational messages are not limited. */
168 	if (type & ICMPV6_INFOMSG_MASK)
169 		return true;
170 
171 	/* Do not limit pmtu discovery, it would break it. */
172 	if (type == ICMPV6_PKT_TOOBIG)
173 		return true;
174 
175 	/*
176 	 * Look up the output route.
177 	 * XXX: perhaps the expire for routing entries cloned by
178 	 * this lookup should be more aggressive (not longer than timeout).
179 	 */
180 	dst = ip6_route_output(net, sk, fl6);
181 	if (dst->error) {
182 		IP6_INC_STATS(net, ip6_dst_idev(dst),
183 			      IPSTATS_MIB_OUTNOROUTES);
184 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
185 		res = true;
186 	} else {
187 		struct rt6_info *rt = (struct rt6_info *)dst;
188 		int tmo = net->ipv6.sysctl.icmpv6_time;
189 
190 		/* Give more bandwidth to wider prefixes. */
191 		if (rt->rt6i_dst.plen < 128)
192 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 
194 		if (!rt->rt6i_peer)
195 			rt6_bind_peer(rt, 1);
196 		res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
197 	}
198 	dst_release(dst);
199 	return res;
200 }
201 
202 /*
203  *	an inline helper for the "simple" if statement below
204  *	checks if parameter problem report is caused by an
205  *	unrecognized IPv6 option that has the Option Type
206  *	highest-order two bits set to 10
207  */
208 
209 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
210 {
211 	u8 _optval, *op;
212 
213 	offset += skb_network_offset(skb);
214 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
215 	if (op == NULL)
216 		return 1;
217 	return (*op & 0xC0) == 0x80;
218 }
219 
220 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
221 {
222 	struct sk_buff *skb;
223 	struct icmp6hdr *icmp6h;
224 	int err = 0;
225 
226 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
227 		goto out;
228 
229 	icmp6h = icmp6_hdr(skb);
230 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
231 	icmp6h->icmp6_cksum = 0;
232 
233 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
234 		skb->csum = csum_partial(icmp6h,
235 					sizeof(struct icmp6hdr), skb->csum);
236 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
237 						      &fl6->daddr,
238 						      len, fl6->flowi6_proto,
239 						      skb->csum);
240 	} else {
241 		__wsum tmp_csum = 0;
242 
243 		skb_queue_walk(&sk->sk_write_queue, skb) {
244 			tmp_csum = csum_add(tmp_csum, skb->csum);
245 		}
246 
247 		tmp_csum = csum_partial(icmp6h,
248 					sizeof(struct icmp6hdr), tmp_csum);
249 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
250 						      &fl6->daddr,
251 						      len, fl6->flowi6_proto,
252 						      tmp_csum);
253 	}
254 	ip6_push_pending_frames(sk);
255 out:
256 	return err;
257 }
258 
259 struct icmpv6_msg {
260 	struct sk_buff	*skb;
261 	int		offset;
262 	uint8_t		type;
263 };
264 
265 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
266 {
267 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
268 	struct sk_buff *org_skb = msg->skb;
269 	__wsum csum = 0;
270 
271 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
272 				      to, len, csum);
273 	skb->csum = csum_block_add(skb->csum, csum, odd);
274 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
275 		nf_ct_attach(skb, org_skb);
276 	return 0;
277 }
278 
279 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
280 static void mip6_addr_swap(struct sk_buff *skb)
281 {
282 	struct ipv6hdr *iph = ipv6_hdr(skb);
283 	struct inet6_skb_parm *opt = IP6CB(skb);
284 	struct ipv6_destopt_hao *hao;
285 	struct in6_addr tmp;
286 	int off;
287 
288 	if (opt->dsthao) {
289 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
290 		if (likely(off >= 0)) {
291 			hao = (struct ipv6_destopt_hao *)
292 					(skb_network_header(skb) + off);
293 			ipv6_addr_copy(&tmp, &iph->saddr);
294 			ipv6_addr_copy(&iph->saddr, &hao->addr);
295 			ipv6_addr_copy(&hao->addr, &tmp);
296 		}
297 	}
298 }
299 #else
300 static inline void mip6_addr_swap(struct sk_buff *skb) {}
301 #endif
302 
303 static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
304 					     struct sock *sk, struct flowi6 *fl6)
305 {
306 	struct dst_entry *dst, *dst2;
307 	struct flowi6 fl2;
308 	int err;
309 
310 	err = ip6_dst_lookup(sk, &dst, fl6);
311 	if (err)
312 		return ERR_PTR(err);
313 
314 	/*
315 	 * We won't send icmp if the destination is known
316 	 * anycast.
317 	 */
318 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
319 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
320 		dst_release(dst);
321 		return ERR_PTR(-EINVAL);
322 	}
323 
324 	/* No need to clone since we're just using its address. */
325 	dst2 = dst;
326 
327 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
328 	if (!IS_ERR(dst)) {
329 		if (dst != dst2)
330 			return dst;
331 	} else {
332 		if (PTR_ERR(dst) == -EPERM)
333 			dst = NULL;
334 		else
335 			return dst;
336 	}
337 
338 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
339 	if (err)
340 		goto relookup_failed;
341 
342 	err = ip6_dst_lookup(sk, &dst2, &fl2);
343 	if (err)
344 		goto relookup_failed;
345 
346 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
347 	if (!IS_ERR(dst2)) {
348 		dst_release(dst);
349 		dst = dst2;
350 	} else {
351 		err = PTR_ERR(dst2);
352 		if (err == -EPERM) {
353 			dst_release(dst);
354 			return dst2;
355 		} else
356 			goto relookup_failed;
357 	}
358 
359 relookup_failed:
360 	if (dst)
361 		return dst;
362 	return ERR_PTR(err);
363 }
364 
365 /*
366  *	Send an ICMP message in response to a packet in error
367  */
368 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
369 {
370 	struct net *net = dev_net(skb->dev);
371 	struct inet6_dev *idev = NULL;
372 	struct ipv6hdr *hdr = ipv6_hdr(skb);
373 	struct sock *sk;
374 	struct ipv6_pinfo *np;
375 	const struct in6_addr *saddr = NULL;
376 	struct dst_entry *dst;
377 	struct icmp6hdr tmp_hdr;
378 	struct flowi6 fl6;
379 	struct icmpv6_msg msg;
380 	int iif = 0;
381 	int addr_type = 0;
382 	int len;
383 	int hlimit;
384 	int err = 0;
385 
386 	if ((u8 *)hdr < skb->head ||
387 	    (skb->network_header + sizeof(*hdr)) > skb->tail)
388 		return;
389 
390 	/*
391 	 *	Make sure we respect the rules
392 	 *	i.e. RFC 1885 2.4(e)
393 	 *	Rule (e.1) is enforced by not using icmpv6_send
394 	 *	in any code that processes icmp errors.
395 	 */
396 	addr_type = ipv6_addr_type(&hdr->daddr);
397 
398 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
399 		saddr = &hdr->daddr;
400 
401 	/*
402 	 *	Dest addr check
403 	 */
404 
405 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
406 		if (type != ICMPV6_PKT_TOOBIG &&
407 		    !(type == ICMPV6_PARAMPROB &&
408 		      code == ICMPV6_UNK_OPTION &&
409 		      (opt_unrec(skb, info))))
410 			return;
411 
412 		saddr = NULL;
413 	}
414 
415 	addr_type = ipv6_addr_type(&hdr->saddr);
416 
417 	/*
418 	 *	Source addr check
419 	 */
420 
421 	if (addr_type & IPV6_ADDR_LINKLOCAL)
422 		iif = skb->dev->ifindex;
423 
424 	/*
425 	 *	Must not send error if the source does not uniquely
426 	 *	identify a single node (RFC2463 Section 2.4).
427 	 *	We check unspecified / multicast addresses here,
428 	 *	and anycast addresses will be checked later.
429 	 */
430 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
431 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
432 		return;
433 	}
434 
435 	/*
436 	 *	Never answer to a ICMP packet.
437 	 */
438 	if (is_ineligible(skb)) {
439 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
440 		return;
441 	}
442 
443 	mip6_addr_swap(skb);
444 
445 	memset(&fl6, 0, sizeof(fl6));
446 	fl6.flowi6_proto = IPPROTO_ICMPV6;
447 	ipv6_addr_copy(&fl6.daddr, &hdr->saddr);
448 	if (saddr)
449 		ipv6_addr_copy(&fl6.saddr, saddr);
450 	fl6.flowi6_oif = iif;
451 	fl6.fl6_icmp_type = type;
452 	fl6.fl6_icmp_code = code;
453 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
454 
455 	sk = icmpv6_xmit_lock(net);
456 	if (sk == NULL)
457 		return;
458 	np = inet6_sk(sk);
459 
460 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
461 		goto out;
462 
463 	tmp_hdr.icmp6_type = type;
464 	tmp_hdr.icmp6_code = code;
465 	tmp_hdr.icmp6_cksum = 0;
466 	tmp_hdr.icmp6_pointer = htonl(info);
467 
468 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
469 		fl6.flowi6_oif = np->mcast_oif;
470 
471 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
472 	if (IS_ERR(dst))
473 		goto out;
474 
475 	if (ipv6_addr_is_multicast(&fl6.daddr))
476 		hlimit = np->mcast_hops;
477 	else
478 		hlimit = np->hop_limit;
479 	if (hlimit < 0)
480 		hlimit = ip6_dst_hoplimit(dst);
481 
482 	msg.skb = skb;
483 	msg.offset = skb_network_offset(skb);
484 	msg.type = type;
485 
486 	len = skb->len - msg.offset;
487 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
488 	if (len < 0) {
489 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
490 		goto out_dst_release;
491 	}
492 
493 	rcu_read_lock();
494 	idev = __in6_dev_get(skb->dev);
495 
496 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
497 			      len + sizeof(struct icmp6hdr),
498 			      sizeof(struct icmp6hdr), hlimit,
499 			      np->tclass, NULL, &fl6, (struct rt6_info*)dst,
500 			      MSG_DONTWAIT, np->dontfrag);
501 	if (err) {
502 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
503 		ip6_flush_pending_frames(sk);
504 	} else {
505 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
506 						 len + sizeof(struct icmp6hdr));
507 	}
508 	rcu_read_unlock();
509 out_dst_release:
510 	dst_release(dst);
511 out:
512 	icmpv6_xmit_unlock(sk);
513 }
514 EXPORT_SYMBOL(icmpv6_send);
515 
516 static void icmpv6_echo_reply(struct sk_buff *skb)
517 {
518 	struct net *net = dev_net(skb->dev);
519 	struct sock *sk;
520 	struct inet6_dev *idev;
521 	struct ipv6_pinfo *np;
522 	const struct in6_addr *saddr = NULL;
523 	struct icmp6hdr *icmph = icmp6_hdr(skb);
524 	struct icmp6hdr tmp_hdr;
525 	struct flowi6 fl6;
526 	struct icmpv6_msg msg;
527 	struct dst_entry *dst;
528 	int err = 0;
529 	int hlimit;
530 
531 	saddr = &ipv6_hdr(skb)->daddr;
532 
533 	if (!ipv6_unicast_destination(skb))
534 		saddr = NULL;
535 
536 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
537 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
538 
539 	memset(&fl6, 0, sizeof(fl6));
540 	fl6.flowi6_proto = IPPROTO_ICMPV6;
541 	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
542 	if (saddr)
543 		ipv6_addr_copy(&fl6.saddr, saddr);
544 	fl6.flowi6_oif = skb->dev->ifindex;
545 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
546 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
547 
548 	sk = icmpv6_xmit_lock(net);
549 	if (sk == NULL)
550 		return;
551 	np = inet6_sk(sk);
552 
553 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
554 		fl6.flowi6_oif = np->mcast_oif;
555 
556 	err = ip6_dst_lookup(sk, &dst, &fl6);
557 	if (err)
558 		goto out;
559 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
560 	if (IS_ERR(dst))
561 		goto out;
562 
563 	if (ipv6_addr_is_multicast(&fl6.daddr))
564 		hlimit = np->mcast_hops;
565 	else
566 		hlimit = np->hop_limit;
567 	if (hlimit < 0)
568 		hlimit = ip6_dst_hoplimit(dst);
569 
570 	idev = __in6_dev_get(skb->dev);
571 
572 	msg.skb = skb;
573 	msg.offset = 0;
574 	msg.type = ICMPV6_ECHO_REPLY;
575 
576 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
577 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
578 				(struct rt6_info*)dst, MSG_DONTWAIT,
579 				np->dontfrag);
580 
581 	if (err) {
582 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
583 		ip6_flush_pending_frames(sk);
584 	} else {
585 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
586 						 skb->len + sizeof(struct icmp6hdr));
587 	}
588 	dst_release(dst);
589 out:
590 	icmpv6_xmit_unlock(sk);
591 }
592 
593 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
594 {
595 	const struct inet6_protocol *ipprot;
596 	int inner_offset;
597 	int hash;
598 	u8 nexthdr;
599 
600 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
601 		return;
602 
603 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
604 	if (ipv6_ext_hdr(nexthdr)) {
605 		/* now skip over extension headers */
606 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
607 		if (inner_offset<0)
608 			return;
609 	} else {
610 		inner_offset = sizeof(struct ipv6hdr);
611 	}
612 
613 	/* Checkin header including 8 bytes of inner protocol header. */
614 	if (!pskb_may_pull(skb, inner_offset+8))
615 		return;
616 
617 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
618 	   Without this we will not able f.e. to make source routed
619 	   pmtu discovery.
620 	   Corresponding argument (opt) to notifiers is already added.
621 	   --ANK (980726)
622 	 */
623 
624 	hash = nexthdr & (MAX_INET_PROTOS - 1);
625 
626 	rcu_read_lock();
627 	ipprot = rcu_dereference(inet6_protos[hash]);
628 	if (ipprot && ipprot->err_handler)
629 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
630 	rcu_read_unlock();
631 
632 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
633 }
634 
635 /*
636  *	Handle icmp messages
637  */
638 
639 static int icmpv6_rcv(struct sk_buff *skb)
640 {
641 	struct net_device *dev = skb->dev;
642 	struct inet6_dev *idev = __in6_dev_get(dev);
643 	const struct in6_addr *saddr, *daddr;
644 	const struct ipv6hdr *orig_hdr;
645 	struct icmp6hdr *hdr;
646 	u8 type;
647 
648 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
649 		struct sec_path *sp = skb_sec_path(skb);
650 		int nh;
651 
652 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
653 				 XFRM_STATE_ICMP))
654 			goto drop_no_count;
655 
656 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
657 			goto drop_no_count;
658 
659 		nh = skb_network_offset(skb);
660 		skb_set_network_header(skb, sizeof(*hdr));
661 
662 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
663 			goto drop_no_count;
664 
665 		skb_set_network_header(skb, nh);
666 	}
667 
668 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
669 
670 	saddr = &ipv6_hdr(skb)->saddr;
671 	daddr = &ipv6_hdr(skb)->daddr;
672 
673 	/* Perform checksum. */
674 	switch (skb->ip_summed) {
675 	case CHECKSUM_COMPLETE:
676 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
677 				     skb->csum))
678 			break;
679 		/* fall through */
680 	case CHECKSUM_NONE:
681 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
682 					     IPPROTO_ICMPV6, 0));
683 		if (__skb_checksum_complete(skb)) {
684 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
685 				       saddr, daddr);
686 			goto discard_it;
687 		}
688 	}
689 
690 	if (!pskb_pull(skb, sizeof(*hdr)))
691 		goto discard_it;
692 
693 	hdr = icmp6_hdr(skb);
694 
695 	type = hdr->icmp6_type;
696 
697 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
698 
699 	switch (type) {
700 	case ICMPV6_ECHO_REQUEST:
701 		icmpv6_echo_reply(skb);
702 		break;
703 
704 	case ICMPV6_ECHO_REPLY:
705 		/* we couldn't care less */
706 		break;
707 
708 	case ICMPV6_PKT_TOOBIG:
709 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
710 		   standard destination cache. Seems, only "advanced"
711 		   destination cache will allow to solve this problem
712 		   --ANK (980726)
713 		 */
714 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
715 			goto discard_it;
716 		hdr = icmp6_hdr(skb);
717 		orig_hdr = (struct ipv6hdr *) (hdr + 1);
718 		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
719 				   ntohl(hdr->icmp6_mtu));
720 
721 		/*
722 		 *	Drop through to notify
723 		 */
724 
725 	case ICMPV6_DEST_UNREACH:
726 	case ICMPV6_TIME_EXCEED:
727 	case ICMPV6_PARAMPROB:
728 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
729 		break;
730 
731 	case NDISC_ROUTER_SOLICITATION:
732 	case NDISC_ROUTER_ADVERTISEMENT:
733 	case NDISC_NEIGHBOUR_SOLICITATION:
734 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
735 	case NDISC_REDIRECT:
736 		ndisc_rcv(skb);
737 		break;
738 
739 	case ICMPV6_MGM_QUERY:
740 		igmp6_event_query(skb);
741 		break;
742 
743 	case ICMPV6_MGM_REPORT:
744 		igmp6_event_report(skb);
745 		break;
746 
747 	case ICMPV6_MGM_REDUCTION:
748 	case ICMPV6_NI_QUERY:
749 	case ICMPV6_NI_REPLY:
750 	case ICMPV6_MLD2_REPORT:
751 	case ICMPV6_DHAAD_REQUEST:
752 	case ICMPV6_DHAAD_REPLY:
753 	case ICMPV6_MOBILE_PREFIX_SOL:
754 	case ICMPV6_MOBILE_PREFIX_ADV:
755 		break;
756 
757 	default:
758 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
759 
760 		/* informational */
761 		if (type & ICMPV6_INFOMSG_MASK)
762 			break;
763 
764 		/*
765 		 * error of unknown type.
766 		 * must pass to upper level
767 		 */
768 
769 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
770 	}
771 
772 	kfree_skb(skb);
773 	return 0;
774 
775 discard_it:
776 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
777 drop_no_count:
778 	kfree_skb(skb);
779 	return 0;
780 }
781 
782 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
783 		      u8 type,
784 		      const struct in6_addr *saddr,
785 		      const struct in6_addr *daddr,
786 		      int oif)
787 {
788 	memset(fl6, 0, sizeof(*fl6));
789 	ipv6_addr_copy(&fl6->saddr, saddr);
790 	ipv6_addr_copy(&fl6->daddr, daddr);
791 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
792 	fl6->fl6_icmp_type	= type;
793 	fl6->fl6_icmp_code	= 0;
794 	fl6->flowi6_oif		= oif;
795 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
796 }
797 
798 /*
799  * Special lock-class for __icmpv6_sk:
800  */
801 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
802 
803 static int __net_init icmpv6_sk_init(struct net *net)
804 {
805 	struct sock *sk;
806 	int err, i, j;
807 
808 	net->ipv6.icmp_sk =
809 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
810 	if (net->ipv6.icmp_sk == NULL)
811 		return -ENOMEM;
812 
813 	for_each_possible_cpu(i) {
814 		err = inet_ctl_sock_create(&sk, PF_INET6,
815 					   SOCK_RAW, IPPROTO_ICMPV6, net);
816 		if (err < 0) {
817 			printk(KERN_ERR
818 			       "Failed to initialize the ICMP6 control socket "
819 			       "(err %d).\n",
820 			       err);
821 			goto fail;
822 		}
823 
824 		net->ipv6.icmp_sk[i] = sk;
825 
826 		/*
827 		 * Split off their lock-class, because sk->sk_dst_lock
828 		 * gets used from softirqs, which is safe for
829 		 * __icmpv6_sk (because those never get directly used
830 		 * via userspace syscalls), but unsafe for normal sockets.
831 		 */
832 		lockdep_set_class(&sk->sk_dst_lock,
833 				  &icmpv6_socket_sk_dst_lock_key);
834 
835 		/* Enough space for 2 64K ICMP packets, including
836 		 * sk_buff struct overhead.
837 		 */
838 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
839 	}
840 	return 0;
841 
842  fail:
843 	for (j = 0; j < i; j++)
844 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
845 	kfree(net->ipv6.icmp_sk);
846 	return err;
847 }
848 
849 static void __net_exit icmpv6_sk_exit(struct net *net)
850 {
851 	int i;
852 
853 	for_each_possible_cpu(i) {
854 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
855 	}
856 	kfree(net->ipv6.icmp_sk);
857 }
858 
859 static struct pernet_operations icmpv6_sk_ops = {
860        .init = icmpv6_sk_init,
861        .exit = icmpv6_sk_exit,
862 };
863 
864 int __init icmpv6_init(void)
865 {
866 	int err;
867 
868 	err = register_pernet_subsys(&icmpv6_sk_ops);
869 	if (err < 0)
870 		return err;
871 
872 	err = -EAGAIN;
873 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
874 		goto fail;
875 	return 0;
876 
877 fail:
878 	printk(KERN_ERR "Failed to register ICMP6 protocol\n");
879 	unregister_pernet_subsys(&icmpv6_sk_ops);
880 	return err;
881 }
882 
883 void icmpv6_cleanup(void)
884 {
885 	unregister_pernet_subsys(&icmpv6_sk_ops);
886 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
887 }
888 
889 
890 static const struct icmp6_err {
891 	int err;
892 	int fatal;
893 } tab_unreach[] = {
894 	{	/* NOROUTE */
895 		.err	= ENETUNREACH,
896 		.fatal	= 0,
897 	},
898 	{	/* ADM_PROHIBITED */
899 		.err	= EACCES,
900 		.fatal	= 1,
901 	},
902 	{	/* Was NOT_NEIGHBOUR, now reserved */
903 		.err	= EHOSTUNREACH,
904 		.fatal	= 0,
905 	},
906 	{	/* ADDR_UNREACH	*/
907 		.err	= EHOSTUNREACH,
908 		.fatal	= 0,
909 	},
910 	{	/* PORT_UNREACH	*/
911 		.err	= ECONNREFUSED,
912 		.fatal	= 1,
913 	},
914 };
915 
916 int icmpv6_err_convert(u8 type, u8 code, int *err)
917 {
918 	int fatal = 0;
919 
920 	*err = EPROTO;
921 
922 	switch (type) {
923 	case ICMPV6_DEST_UNREACH:
924 		fatal = 1;
925 		if (code <= ICMPV6_PORT_UNREACH) {
926 			*err  = tab_unreach[code].err;
927 			fatal = tab_unreach[code].fatal;
928 		}
929 		break;
930 
931 	case ICMPV6_PKT_TOOBIG:
932 		*err = EMSGSIZE;
933 		break;
934 
935 	case ICMPV6_PARAMPROB:
936 		*err = EPROTO;
937 		fatal = 1;
938 		break;
939 
940 	case ICMPV6_TIME_EXCEED:
941 		*err = EHOSTUNREACH;
942 		break;
943 	}
944 
945 	return fatal;
946 }
947 
948 EXPORT_SYMBOL(icmpv6_err_convert);
949 
950 #ifdef CONFIG_SYSCTL
951 ctl_table ipv6_icmp_table_template[] = {
952 	{
953 		.procname	= "ratelimit",
954 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
955 		.maxlen		= sizeof(int),
956 		.mode		= 0644,
957 		.proc_handler	= proc_dointvec_ms_jiffies,
958 	},
959 	{ },
960 };
961 
962 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
963 {
964 	struct ctl_table *table;
965 
966 	table = kmemdup(ipv6_icmp_table_template,
967 			sizeof(ipv6_icmp_table_template),
968 			GFP_KERNEL);
969 
970 	if (table)
971 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
972 
973 	return table;
974 }
975 #endif
976 
977