xref: /openbmc/linux/net/ipv6/icmp.c (revision b94f1c09)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/protocol.h>
61 #include <net/raw.h>
62 #include <net/rawv6.h>
63 #include <net/transp_v6.h>
64 #include <net/ip6_route.h>
65 #include <net/addrconf.h>
66 #include <net/icmp.h>
67 #include <net/xfrm.h>
68 #include <net/inet_common.h>
69 
70 #include <asm/uaccess.h>
71 
72 /*
73  *	The ICMP socket(s). This is the most convenient way to flow control
74  *	our ICMP output as well as maintain a clean interface throughout
75  *	all layers. All Socketless IP sends will soon be gone.
76  *
77  *	On SMP we have one ICMP socket per-cpu.
78  */
79 static inline struct sock *icmpv6_sk(struct net *net)
80 {
81 	return net->ipv6.icmp_sk[smp_processor_id()];
82 }
83 
84 static int icmpv6_rcv(struct sk_buff *skb);
85 
86 static const struct inet6_protocol icmpv6_protocol = {
87 	.handler	=	icmpv6_rcv,
88 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
89 };
90 
91 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
92 {
93 	struct sock *sk;
94 
95 	local_bh_disable();
96 
97 	sk = icmpv6_sk(net);
98 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
99 		/* This can happen if the output path (f.e. SIT or
100 		 * ip6ip6 tunnel) signals dst_link_failure() for an
101 		 * outgoing ICMP6 packet.
102 		 */
103 		local_bh_enable();
104 		return NULL;
105 	}
106 	return sk;
107 }
108 
109 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
110 {
111 	spin_unlock_bh(&sk->sk_lock.slock);
112 }
113 
114 /*
115  * Slightly more convenient version of icmpv6_send.
116  */
117 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
118 {
119 	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
120 	kfree_skb(skb);
121 }
122 
123 /*
124  * Figure out, may we reply to this packet with icmp error.
125  *
126  * We do not reply, if:
127  *	- it was icmp error message.
128  *	- it is truncated, so that it is known, that protocol is ICMPV6
129  *	  (i.e. in the middle of some exthdr)
130  *
131  *	--ANK (980726)
132  */
133 
134 static bool is_ineligible(const struct sk_buff *skb)
135 {
136 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
137 	int len = skb->len - ptr;
138 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
139 	__be16 frag_off;
140 
141 	if (len < 0)
142 		return true;
143 
144 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
145 	if (ptr < 0)
146 		return false;
147 	if (nexthdr == IPPROTO_ICMPV6) {
148 		u8 _type, *tp;
149 		tp = skb_header_pointer(skb,
150 			ptr+offsetof(struct icmp6hdr, icmp6_type),
151 			sizeof(_type), &_type);
152 		if (tp == NULL ||
153 		    !(*tp & ICMPV6_INFOMSG_MASK))
154 			return true;
155 	}
156 	return false;
157 }
158 
159 /*
160  * Check the ICMP output rate limit
161  */
162 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
163 				      struct flowi6 *fl6)
164 {
165 	struct dst_entry *dst;
166 	struct net *net = sock_net(sk);
167 	bool res = false;
168 
169 	/* Informational messages are not limited. */
170 	if (type & ICMPV6_INFOMSG_MASK)
171 		return true;
172 
173 	/* Do not limit pmtu discovery, it would break it. */
174 	if (type == ICMPV6_PKT_TOOBIG)
175 		return true;
176 
177 	/*
178 	 * Look up the output route.
179 	 * XXX: perhaps the expire for routing entries cloned by
180 	 * this lookup should be more aggressive (not longer than timeout).
181 	 */
182 	dst = ip6_route_output(net, sk, fl6);
183 	if (dst->error) {
184 		IP6_INC_STATS(net, ip6_dst_idev(dst),
185 			      IPSTATS_MIB_OUTNOROUTES);
186 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
187 		res = true;
188 	} else {
189 		struct rt6_info *rt = (struct rt6_info *)dst;
190 		int tmo = net->ipv6.sysctl.icmpv6_time;
191 		struct inet_peer *peer;
192 
193 		/* Give more bandwidth to wider prefixes. */
194 		if (rt->rt6i_dst.plen < 128)
195 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
196 
197 		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
198 		res = inet_peer_xrlim_allow(peer, tmo);
199 		if (peer)
200 			inet_putpeer(peer);
201 	}
202 	dst_release(dst);
203 	return res;
204 }
205 
206 /*
207  *	an inline helper for the "simple" if statement below
208  *	checks if parameter problem report is caused by an
209  *	unrecognized IPv6 option that has the Option Type
210  *	highest-order two bits set to 10
211  */
212 
213 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
214 {
215 	u8 _optval, *op;
216 
217 	offset += skb_network_offset(skb);
218 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
219 	if (op == NULL)
220 		return true;
221 	return (*op & 0xC0) == 0x80;
222 }
223 
224 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
225 {
226 	struct sk_buff *skb;
227 	struct icmp6hdr *icmp6h;
228 	int err = 0;
229 
230 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
231 		goto out;
232 
233 	icmp6h = icmp6_hdr(skb);
234 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
235 	icmp6h->icmp6_cksum = 0;
236 
237 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
238 		skb->csum = csum_partial(icmp6h,
239 					sizeof(struct icmp6hdr), skb->csum);
240 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
241 						      &fl6->daddr,
242 						      len, fl6->flowi6_proto,
243 						      skb->csum);
244 	} else {
245 		__wsum tmp_csum = 0;
246 
247 		skb_queue_walk(&sk->sk_write_queue, skb) {
248 			tmp_csum = csum_add(tmp_csum, skb->csum);
249 		}
250 
251 		tmp_csum = csum_partial(icmp6h,
252 					sizeof(struct icmp6hdr), tmp_csum);
253 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
254 						      &fl6->daddr,
255 						      len, fl6->flowi6_proto,
256 						      tmp_csum);
257 	}
258 	ip6_push_pending_frames(sk);
259 out:
260 	return err;
261 }
262 
263 struct icmpv6_msg {
264 	struct sk_buff	*skb;
265 	int		offset;
266 	uint8_t		type;
267 };
268 
269 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
270 {
271 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
272 	struct sk_buff *org_skb = msg->skb;
273 	__wsum csum = 0;
274 
275 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
276 				      to, len, csum);
277 	skb->csum = csum_block_add(skb->csum, csum, odd);
278 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
279 		nf_ct_attach(skb, org_skb);
280 	return 0;
281 }
282 
283 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
284 static void mip6_addr_swap(struct sk_buff *skb)
285 {
286 	struct ipv6hdr *iph = ipv6_hdr(skb);
287 	struct inet6_skb_parm *opt = IP6CB(skb);
288 	struct ipv6_destopt_hao *hao;
289 	struct in6_addr tmp;
290 	int off;
291 
292 	if (opt->dsthao) {
293 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
294 		if (likely(off >= 0)) {
295 			hao = (struct ipv6_destopt_hao *)
296 					(skb_network_header(skb) + off);
297 			tmp = iph->saddr;
298 			iph->saddr = hao->addr;
299 			hao->addr = tmp;
300 		}
301 	}
302 }
303 #else
304 static inline void mip6_addr_swap(struct sk_buff *skb) {}
305 #endif
306 
307 static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
308 					     struct sock *sk, struct flowi6 *fl6)
309 {
310 	struct dst_entry *dst, *dst2;
311 	struct flowi6 fl2;
312 	int err;
313 
314 	err = ip6_dst_lookup(sk, &dst, fl6);
315 	if (err)
316 		return ERR_PTR(err);
317 
318 	/*
319 	 * We won't send icmp if the destination is known
320 	 * anycast.
321 	 */
322 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
323 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
324 		dst_release(dst);
325 		return ERR_PTR(-EINVAL);
326 	}
327 
328 	/* No need to clone since we're just using its address. */
329 	dst2 = dst;
330 
331 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
332 	if (!IS_ERR(dst)) {
333 		if (dst != dst2)
334 			return dst;
335 	} else {
336 		if (PTR_ERR(dst) == -EPERM)
337 			dst = NULL;
338 		else
339 			return dst;
340 	}
341 
342 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
343 	if (err)
344 		goto relookup_failed;
345 
346 	err = ip6_dst_lookup(sk, &dst2, &fl2);
347 	if (err)
348 		goto relookup_failed;
349 
350 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
351 	if (!IS_ERR(dst2)) {
352 		dst_release(dst);
353 		dst = dst2;
354 	} else {
355 		err = PTR_ERR(dst2);
356 		if (err == -EPERM) {
357 			dst_release(dst);
358 			return dst2;
359 		} else
360 			goto relookup_failed;
361 	}
362 
363 relookup_failed:
364 	if (dst)
365 		return dst;
366 	return ERR_PTR(err);
367 }
368 
369 /*
370  *	Send an ICMP message in response to a packet in error
371  */
372 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
373 {
374 	struct net *net = dev_net(skb->dev);
375 	struct inet6_dev *idev = NULL;
376 	struct ipv6hdr *hdr = ipv6_hdr(skb);
377 	struct sock *sk;
378 	struct ipv6_pinfo *np;
379 	const struct in6_addr *saddr = NULL;
380 	struct dst_entry *dst;
381 	struct icmp6hdr tmp_hdr;
382 	struct flowi6 fl6;
383 	struct icmpv6_msg msg;
384 	int iif = 0;
385 	int addr_type = 0;
386 	int len;
387 	int hlimit;
388 	int err = 0;
389 
390 	if ((u8 *)hdr < skb->head ||
391 	    (skb->network_header + sizeof(*hdr)) > skb->tail)
392 		return;
393 
394 	/*
395 	 *	Make sure we respect the rules
396 	 *	i.e. RFC 1885 2.4(e)
397 	 *	Rule (e.1) is enforced by not using icmpv6_send
398 	 *	in any code that processes icmp errors.
399 	 */
400 	addr_type = ipv6_addr_type(&hdr->daddr);
401 
402 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
403 		saddr = &hdr->daddr;
404 
405 	/*
406 	 *	Dest addr check
407 	 */
408 
409 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
410 		if (type != ICMPV6_PKT_TOOBIG &&
411 		    !(type == ICMPV6_PARAMPROB &&
412 		      code == ICMPV6_UNK_OPTION &&
413 		      (opt_unrec(skb, info))))
414 			return;
415 
416 		saddr = NULL;
417 	}
418 
419 	addr_type = ipv6_addr_type(&hdr->saddr);
420 
421 	/*
422 	 *	Source addr check
423 	 */
424 
425 	if (addr_type & IPV6_ADDR_LINKLOCAL)
426 		iif = skb->dev->ifindex;
427 
428 	/*
429 	 *	Must not send error if the source does not uniquely
430 	 *	identify a single node (RFC2463 Section 2.4).
431 	 *	We check unspecified / multicast addresses here,
432 	 *	and anycast addresses will be checked later.
433 	 */
434 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
435 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
436 		return;
437 	}
438 
439 	/*
440 	 *	Never answer to a ICMP packet.
441 	 */
442 	if (is_ineligible(skb)) {
443 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
444 		return;
445 	}
446 
447 	mip6_addr_swap(skb);
448 
449 	memset(&fl6, 0, sizeof(fl6));
450 	fl6.flowi6_proto = IPPROTO_ICMPV6;
451 	fl6.daddr = hdr->saddr;
452 	if (saddr)
453 		fl6.saddr = *saddr;
454 	fl6.flowi6_oif = iif;
455 	fl6.fl6_icmp_type = type;
456 	fl6.fl6_icmp_code = code;
457 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
458 
459 	sk = icmpv6_xmit_lock(net);
460 	if (sk == NULL)
461 		return;
462 	np = inet6_sk(sk);
463 
464 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
465 		goto out;
466 
467 	tmp_hdr.icmp6_type = type;
468 	tmp_hdr.icmp6_code = code;
469 	tmp_hdr.icmp6_cksum = 0;
470 	tmp_hdr.icmp6_pointer = htonl(info);
471 
472 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
473 		fl6.flowi6_oif = np->mcast_oif;
474 	else if (!fl6.flowi6_oif)
475 		fl6.flowi6_oif = np->ucast_oif;
476 
477 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
478 	if (IS_ERR(dst))
479 		goto out;
480 
481 	if (ipv6_addr_is_multicast(&fl6.daddr))
482 		hlimit = np->mcast_hops;
483 	else
484 		hlimit = np->hop_limit;
485 	if (hlimit < 0)
486 		hlimit = ip6_dst_hoplimit(dst);
487 
488 	msg.skb = skb;
489 	msg.offset = skb_network_offset(skb);
490 	msg.type = type;
491 
492 	len = skb->len - msg.offset;
493 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
494 	if (len < 0) {
495 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
496 		goto out_dst_release;
497 	}
498 
499 	rcu_read_lock();
500 	idev = __in6_dev_get(skb->dev);
501 
502 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
503 			      len + sizeof(struct icmp6hdr),
504 			      sizeof(struct icmp6hdr), hlimit,
505 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
506 			      MSG_DONTWAIT, np->dontfrag);
507 	if (err) {
508 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
509 		ip6_flush_pending_frames(sk);
510 	} else {
511 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
512 						 len + sizeof(struct icmp6hdr));
513 	}
514 	rcu_read_unlock();
515 out_dst_release:
516 	dst_release(dst);
517 out:
518 	icmpv6_xmit_unlock(sk);
519 }
520 EXPORT_SYMBOL(icmpv6_send);
521 
522 static void icmpv6_echo_reply(struct sk_buff *skb)
523 {
524 	struct net *net = dev_net(skb->dev);
525 	struct sock *sk;
526 	struct inet6_dev *idev;
527 	struct ipv6_pinfo *np;
528 	const struct in6_addr *saddr = NULL;
529 	struct icmp6hdr *icmph = icmp6_hdr(skb);
530 	struct icmp6hdr tmp_hdr;
531 	struct flowi6 fl6;
532 	struct icmpv6_msg msg;
533 	struct dst_entry *dst;
534 	int err = 0;
535 	int hlimit;
536 
537 	saddr = &ipv6_hdr(skb)->daddr;
538 
539 	if (!ipv6_unicast_destination(skb))
540 		saddr = NULL;
541 
542 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
543 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
544 
545 	memset(&fl6, 0, sizeof(fl6));
546 	fl6.flowi6_proto = IPPROTO_ICMPV6;
547 	fl6.daddr = ipv6_hdr(skb)->saddr;
548 	if (saddr)
549 		fl6.saddr = *saddr;
550 	fl6.flowi6_oif = skb->dev->ifindex;
551 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
552 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
553 
554 	sk = icmpv6_xmit_lock(net);
555 	if (sk == NULL)
556 		return;
557 	np = inet6_sk(sk);
558 
559 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
560 		fl6.flowi6_oif = np->mcast_oif;
561 	else if (!fl6.flowi6_oif)
562 		fl6.flowi6_oif = np->ucast_oif;
563 
564 	err = ip6_dst_lookup(sk, &dst, &fl6);
565 	if (err)
566 		goto out;
567 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
568 	if (IS_ERR(dst))
569 		goto out;
570 
571 	if (ipv6_addr_is_multicast(&fl6.daddr))
572 		hlimit = np->mcast_hops;
573 	else
574 		hlimit = np->hop_limit;
575 	if (hlimit < 0)
576 		hlimit = ip6_dst_hoplimit(dst);
577 
578 	idev = __in6_dev_get(skb->dev);
579 
580 	msg.skb = skb;
581 	msg.offset = 0;
582 	msg.type = ICMPV6_ECHO_REPLY;
583 
584 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
585 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
586 				(struct rt6_info *)dst, MSG_DONTWAIT,
587 				np->dontfrag);
588 
589 	if (err) {
590 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
591 		ip6_flush_pending_frames(sk);
592 	} else {
593 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
594 						 skb->len + sizeof(struct icmp6hdr));
595 	}
596 	dst_release(dst);
597 out:
598 	icmpv6_xmit_unlock(sk);
599 }
600 
601 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
602 {
603 	const struct inet6_protocol *ipprot;
604 	int inner_offset;
605 	__be16 frag_off;
606 	u8 nexthdr;
607 
608 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
609 		return;
610 
611 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
612 	if (ipv6_ext_hdr(nexthdr)) {
613 		/* now skip over extension headers */
614 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
615 						&nexthdr, &frag_off);
616 		if (inner_offset<0)
617 			return;
618 	} else {
619 		inner_offset = sizeof(struct ipv6hdr);
620 	}
621 
622 	/* Checkin header including 8 bytes of inner protocol header. */
623 	if (!pskb_may_pull(skb, inner_offset+8))
624 		return;
625 
626 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
627 	   Without this we will not able f.e. to make source routed
628 	   pmtu discovery.
629 	   Corresponding argument (opt) to notifiers is already added.
630 	   --ANK (980726)
631 	 */
632 
633 	rcu_read_lock();
634 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
635 	if (ipprot && ipprot->err_handler)
636 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
637 	rcu_read_unlock();
638 
639 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
640 }
641 
642 /*
643  *	Handle icmp messages
644  */
645 
646 static int icmpv6_rcv(struct sk_buff *skb)
647 {
648 	struct net_device *dev = skb->dev;
649 	struct inet6_dev *idev = __in6_dev_get(dev);
650 	const struct in6_addr *saddr, *daddr;
651 	struct icmp6hdr *hdr;
652 	u8 type;
653 
654 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
655 		struct sec_path *sp = skb_sec_path(skb);
656 		int nh;
657 
658 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
659 				 XFRM_STATE_ICMP))
660 			goto drop_no_count;
661 
662 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
663 			goto drop_no_count;
664 
665 		nh = skb_network_offset(skb);
666 		skb_set_network_header(skb, sizeof(*hdr));
667 
668 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
669 			goto drop_no_count;
670 
671 		skb_set_network_header(skb, nh);
672 	}
673 
674 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
675 
676 	saddr = &ipv6_hdr(skb)->saddr;
677 	daddr = &ipv6_hdr(skb)->daddr;
678 
679 	/* Perform checksum. */
680 	switch (skb->ip_summed) {
681 	case CHECKSUM_COMPLETE:
682 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
683 				     skb->csum))
684 			break;
685 		/* fall through */
686 	case CHECKSUM_NONE:
687 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
688 					     IPPROTO_ICMPV6, 0));
689 		if (__skb_checksum_complete(skb)) {
690 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
691 				       saddr, daddr);
692 			goto discard_it;
693 		}
694 	}
695 
696 	if (!pskb_pull(skb, sizeof(*hdr)))
697 		goto discard_it;
698 
699 	hdr = icmp6_hdr(skb);
700 
701 	type = hdr->icmp6_type;
702 
703 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
704 
705 	switch (type) {
706 	case ICMPV6_ECHO_REQUEST:
707 		icmpv6_echo_reply(skb);
708 		break;
709 
710 	case ICMPV6_ECHO_REPLY:
711 		/* we couldn't care less */
712 		break;
713 
714 	case ICMPV6_PKT_TOOBIG:
715 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
716 		   standard destination cache. Seems, only "advanced"
717 		   destination cache will allow to solve this problem
718 		   --ANK (980726)
719 		 */
720 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
721 			goto discard_it;
722 		hdr = icmp6_hdr(skb);
723 
724 		/*
725 		 *	Drop through to notify
726 		 */
727 
728 	case ICMPV6_DEST_UNREACH:
729 	case ICMPV6_TIME_EXCEED:
730 	case ICMPV6_PARAMPROB:
731 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
732 		break;
733 
734 	case NDISC_ROUTER_SOLICITATION:
735 	case NDISC_ROUTER_ADVERTISEMENT:
736 	case NDISC_NEIGHBOUR_SOLICITATION:
737 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
738 	case NDISC_REDIRECT:
739 		ndisc_rcv(skb);
740 		break;
741 
742 	case ICMPV6_MGM_QUERY:
743 		igmp6_event_query(skb);
744 		break;
745 
746 	case ICMPV6_MGM_REPORT:
747 		igmp6_event_report(skb);
748 		break;
749 
750 	case ICMPV6_MGM_REDUCTION:
751 	case ICMPV6_NI_QUERY:
752 	case ICMPV6_NI_REPLY:
753 	case ICMPV6_MLD2_REPORT:
754 	case ICMPV6_DHAAD_REQUEST:
755 	case ICMPV6_DHAAD_REPLY:
756 	case ICMPV6_MOBILE_PREFIX_SOL:
757 	case ICMPV6_MOBILE_PREFIX_ADV:
758 		break;
759 
760 	default:
761 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
762 
763 		/* informational */
764 		if (type & ICMPV6_INFOMSG_MASK)
765 			break;
766 
767 		/*
768 		 * error of unknown type.
769 		 * must pass to upper level
770 		 */
771 
772 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
773 	}
774 
775 	kfree_skb(skb);
776 	return 0;
777 
778 discard_it:
779 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
780 drop_no_count:
781 	kfree_skb(skb);
782 	return 0;
783 }
784 
785 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
786 		      u8 type,
787 		      const struct in6_addr *saddr,
788 		      const struct in6_addr *daddr,
789 		      int oif)
790 {
791 	memset(fl6, 0, sizeof(*fl6));
792 	fl6->saddr = *saddr;
793 	fl6->daddr = *daddr;
794 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
795 	fl6->fl6_icmp_type	= type;
796 	fl6->fl6_icmp_code	= 0;
797 	fl6->flowi6_oif		= oif;
798 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
799 }
800 
801 /*
802  * Special lock-class for __icmpv6_sk:
803  */
804 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
805 
806 static int __net_init icmpv6_sk_init(struct net *net)
807 {
808 	struct sock *sk;
809 	int err, i, j;
810 
811 	net->ipv6.icmp_sk =
812 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
813 	if (net->ipv6.icmp_sk == NULL)
814 		return -ENOMEM;
815 
816 	for_each_possible_cpu(i) {
817 		err = inet_ctl_sock_create(&sk, PF_INET6,
818 					   SOCK_RAW, IPPROTO_ICMPV6, net);
819 		if (err < 0) {
820 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
821 			       err);
822 			goto fail;
823 		}
824 
825 		net->ipv6.icmp_sk[i] = sk;
826 
827 		/*
828 		 * Split off their lock-class, because sk->sk_dst_lock
829 		 * gets used from softirqs, which is safe for
830 		 * __icmpv6_sk (because those never get directly used
831 		 * via userspace syscalls), but unsafe for normal sockets.
832 		 */
833 		lockdep_set_class(&sk->sk_dst_lock,
834 				  &icmpv6_socket_sk_dst_lock_key);
835 
836 		/* Enough space for 2 64K ICMP packets, including
837 		 * sk_buff struct overhead.
838 		 */
839 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
840 	}
841 	return 0;
842 
843  fail:
844 	for (j = 0; j < i; j++)
845 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
846 	kfree(net->ipv6.icmp_sk);
847 	return err;
848 }
849 
850 static void __net_exit icmpv6_sk_exit(struct net *net)
851 {
852 	int i;
853 
854 	for_each_possible_cpu(i) {
855 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
856 	}
857 	kfree(net->ipv6.icmp_sk);
858 }
859 
860 static struct pernet_operations icmpv6_sk_ops = {
861        .init = icmpv6_sk_init,
862        .exit = icmpv6_sk_exit,
863 };
864 
865 int __init icmpv6_init(void)
866 {
867 	int err;
868 
869 	err = register_pernet_subsys(&icmpv6_sk_ops);
870 	if (err < 0)
871 		return err;
872 
873 	err = -EAGAIN;
874 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
875 		goto fail;
876 	return 0;
877 
878 fail:
879 	pr_err("Failed to register ICMP6 protocol\n");
880 	unregister_pernet_subsys(&icmpv6_sk_ops);
881 	return err;
882 }
883 
884 void icmpv6_cleanup(void)
885 {
886 	unregister_pernet_subsys(&icmpv6_sk_ops);
887 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
888 }
889 
890 
891 static const struct icmp6_err {
892 	int err;
893 	int fatal;
894 } tab_unreach[] = {
895 	{	/* NOROUTE */
896 		.err	= ENETUNREACH,
897 		.fatal	= 0,
898 	},
899 	{	/* ADM_PROHIBITED */
900 		.err	= EACCES,
901 		.fatal	= 1,
902 	},
903 	{	/* Was NOT_NEIGHBOUR, now reserved */
904 		.err	= EHOSTUNREACH,
905 		.fatal	= 0,
906 	},
907 	{	/* ADDR_UNREACH	*/
908 		.err	= EHOSTUNREACH,
909 		.fatal	= 0,
910 	},
911 	{	/* PORT_UNREACH	*/
912 		.err	= ECONNREFUSED,
913 		.fatal	= 1,
914 	},
915 };
916 
917 int icmpv6_err_convert(u8 type, u8 code, int *err)
918 {
919 	int fatal = 0;
920 
921 	*err = EPROTO;
922 
923 	switch (type) {
924 	case ICMPV6_DEST_UNREACH:
925 		fatal = 1;
926 		if (code <= ICMPV6_PORT_UNREACH) {
927 			*err  = tab_unreach[code].err;
928 			fatal = tab_unreach[code].fatal;
929 		}
930 		break;
931 
932 	case ICMPV6_PKT_TOOBIG:
933 		*err = EMSGSIZE;
934 		break;
935 
936 	case ICMPV6_PARAMPROB:
937 		*err = EPROTO;
938 		fatal = 1;
939 		break;
940 
941 	case ICMPV6_TIME_EXCEED:
942 		*err = EHOSTUNREACH;
943 		break;
944 	}
945 
946 	return fatal;
947 }
948 EXPORT_SYMBOL(icmpv6_err_convert);
949 
950 #ifdef CONFIG_SYSCTL
951 ctl_table ipv6_icmp_table_template[] = {
952 	{
953 		.procname	= "ratelimit",
954 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
955 		.maxlen		= sizeof(int),
956 		.mode		= 0644,
957 		.proc_handler	= proc_dointvec_ms_jiffies,
958 	},
959 	{ },
960 };
961 
962 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
963 {
964 	struct ctl_table *table;
965 
966 	table = kmemdup(ipv6_icmp_table_template,
967 			sizeof(ipv6_icmp_table_template),
968 			GFP_KERNEL);
969 
970 	if (table)
971 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
972 
973 	return table;
974 }
975 #endif
976 
977