xref: /openbmc/linux/net/ipv6/icmp.c (revision c4062dfc)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #include <linux/module.h>
33 #include <linux/errno.h>
34 #include <linux/types.h>
35 #include <linux/socket.h>
36 #include <linux/in.h>
37 #include <linux/kernel.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/skbuff.h>
41 #include <linux/init.h>
42 #include <linux/netfilter.h>
43 #include <linux/slab.h>
44 
45 #ifdef CONFIG_SYSCTL
46 #include <linux/sysctl.h>
47 #endif
48 
49 #include <linux/inet.h>
50 #include <linux/netdevice.h>
51 #include <linux/icmpv6.h>
52 
53 #include <net/ip.h>
54 #include <net/sock.h>
55 
56 #include <net/ipv6.h>
57 #include <net/ip6_checksum.h>
58 #include <net/protocol.h>
59 #include <net/raw.h>
60 #include <net/rawv6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static inline struct sock *icmpv6_sk(struct net *net)
79 {
80 	return net->ipv6.icmp_sk[smp_processor_id()];
81 }
82 
83 static int icmpv6_rcv(struct sk_buff *skb);
84 
85 static const struct inet6_protocol icmpv6_protocol = {
86 	.handler	=	icmpv6_rcv,
87 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
88 };
89 
90 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
91 {
92 	struct sock *sk;
93 
94 	local_bh_disable();
95 
96 	sk = icmpv6_sk(net);
97 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
98 		/* This can happen if the output path (f.e. SIT or
99 		 * ip6ip6 tunnel) signals dst_link_failure() for an
100 		 * outgoing ICMP6 packet.
101 		 */
102 		local_bh_enable();
103 		return NULL;
104 	}
105 	return sk;
106 }
107 
108 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
109 {
110 	spin_unlock_bh(&sk->sk_lock.slock);
111 }
112 
113 /*
114  * Slightly more convenient version of icmpv6_send.
115  */
116 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
117 {
118 	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
119 	kfree_skb(skb);
120 }
121 
122 /*
123  * Figure out, may we reply to this packet with icmp error.
124  *
125  * We do not reply, if:
126  *	- it was icmp error message.
127  *	- it is truncated, so that it is known, that protocol is ICMPV6
128  *	  (i.e. in the middle of some exthdr)
129  *
130  *	--ANK (980726)
131  */
132 
133 static int is_ineligible(struct sk_buff *skb)
134 {
135 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
136 	int len = skb->len - ptr;
137 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
138 	__be16 frag_off;
139 
140 	if (len < 0)
141 		return 1;
142 
143 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
144 	if (ptr < 0)
145 		return 0;
146 	if (nexthdr == IPPROTO_ICMPV6) {
147 		u8 _type, *tp;
148 		tp = skb_header_pointer(skb,
149 			ptr+offsetof(struct icmp6hdr, icmp6_type),
150 			sizeof(_type), &_type);
151 		if (tp == NULL ||
152 		    !(*tp & ICMPV6_INFOMSG_MASK))
153 			return 1;
154 	}
155 	return 0;
156 }
157 
158 /*
159  * Check the ICMP output rate limit
160  */
161 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
162 				      struct flowi6 *fl6)
163 {
164 	struct dst_entry *dst;
165 	struct net *net = sock_net(sk);
166 	bool res = false;
167 
168 	/* Informational messages are not limited. */
169 	if (type & ICMPV6_INFOMSG_MASK)
170 		return true;
171 
172 	/* Do not limit pmtu discovery, it would break it. */
173 	if (type == ICMPV6_PKT_TOOBIG)
174 		return true;
175 
176 	/*
177 	 * Look up the output route.
178 	 * XXX: perhaps the expire for routing entries cloned by
179 	 * this lookup should be more aggressive (not longer than timeout).
180 	 */
181 	dst = ip6_route_output(net, sk, fl6);
182 	if (dst->error) {
183 		IP6_INC_STATS(net, ip6_dst_idev(dst),
184 			      IPSTATS_MIB_OUTNOROUTES);
185 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
186 		res = true;
187 	} else {
188 		struct rt6_info *rt = (struct rt6_info *)dst;
189 		int tmo = net->ipv6.sysctl.icmpv6_time;
190 
191 		/* Give more bandwidth to wider prefixes. */
192 		if (rt->rt6i_dst.plen < 128)
193 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
194 
195 		if (!rt->rt6i_peer)
196 			rt6_bind_peer(rt, 1);
197 		res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
198 	}
199 	dst_release(dst);
200 	return res;
201 }
202 
203 /*
204  *	an inline helper for the "simple" if statement below
205  *	checks if parameter problem report is caused by an
206  *	unrecognized IPv6 option that has the Option Type
207  *	highest-order two bits set to 10
208  */
209 
210 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
211 {
212 	u8 _optval, *op;
213 
214 	offset += skb_network_offset(skb);
215 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
216 	if (op == NULL)
217 		return 1;
218 	return (*op & 0xC0) == 0x80;
219 }
220 
221 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
222 {
223 	struct sk_buff *skb;
224 	struct icmp6hdr *icmp6h;
225 	int err = 0;
226 
227 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
228 		goto out;
229 
230 	icmp6h = icmp6_hdr(skb);
231 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
232 	icmp6h->icmp6_cksum = 0;
233 
234 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
235 		skb->csum = csum_partial(icmp6h,
236 					sizeof(struct icmp6hdr), skb->csum);
237 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
238 						      &fl6->daddr,
239 						      len, fl6->flowi6_proto,
240 						      skb->csum);
241 	} else {
242 		__wsum tmp_csum = 0;
243 
244 		skb_queue_walk(&sk->sk_write_queue, skb) {
245 			tmp_csum = csum_add(tmp_csum, skb->csum);
246 		}
247 
248 		tmp_csum = csum_partial(icmp6h,
249 					sizeof(struct icmp6hdr), tmp_csum);
250 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
251 						      &fl6->daddr,
252 						      len, fl6->flowi6_proto,
253 						      tmp_csum);
254 	}
255 	ip6_push_pending_frames(sk);
256 out:
257 	return err;
258 }
259 
260 struct icmpv6_msg {
261 	struct sk_buff	*skb;
262 	int		offset;
263 	uint8_t		type;
264 };
265 
266 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
267 {
268 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
269 	struct sk_buff *org_skb = msg->skb;
270 	__wsum csum = 0;
271 
272 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
273 				      to, len, csum);
274 	skb->csum = csum_block_add(skb->csum, csum, odd);
275 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
276 		nf_ct_attach(skb, org_skb);
277 	return 0;
278 }
279 
280 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
281 static void mip6_addr_swap(struct sk_buff *skb)
282 {
283 	struct ipv6hdr *iph = ipv6_hdr(skb);
284 	struct inet6_skb_parm *opt = IP6CB(skb);
285 	struct ipv6_destopt_hao *hao;
286 	struct in6_addr tmp;
287 	int off;
288 
289 	if (opt->dsthao) {
290 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
291 		if (likely(off >= 0)) {
292 			hao = (struct ipv6_destopt_hao *)
293 					(skb_network_header(skb) + off);
294 			tmp = iph->saddr;
295 			iph->saddr = hao->addr;
296 			hao->addr = tmp;
297 		}
298 	}
299 }
300 #else
301 static inline void mip6_addr_swap(struct sk_buff *skb) {}
302 #endif
303 
304 static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
305 					     struct sock *sk, struct flowi6 *fl6)
306 {
307 	struct dst_entry *dst, *dst2;
308 	struct flowi6 fl2;
309 	int err;
310 
311 	err = ip6_dst_lookup(sk, &dst, fl6);
312 	if (err)
313 		return ERR_PTR(err);
314 
315 	/*
316 	 * We won't send icmp if the destination is known
317 	 * anycast.
318 	 */
319 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
320 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
321 		dst_release(dst);
322 		return ERR_PTR(-EINVAL);
323 	}
324 
325 	/* No need to clone since we're just using its address. */
326 	dst2 = dst;
327 
328 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
329 	if (!IS_ERR(dst)) {
330 		if (dst != dst2)
331 			return dst;
332 	} else {
333 		if (PTR_ERR(dst) == -EPERM)
334 			dst = NULL;
335 		else
336 			return dst;
337 	}
338 
339 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
340 	if (err)
341 		goto relookup_failed;
342 
343 	err = ip6_dst_lookup(sk, &dst2, &fl2);
344 	if (err)
345 		goto relookup_failed;
346 
347 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
348 	if (!IS_ERR(dst2)) {
349 		dst_release(dst);
350 		dst = dst2;
351 	} else {
352 		err = PTR_ERR(dst2);
353 		if (err == -EPERM) {
354 			dst_release(dst);
355 			return dst2;
356 		} else
357 			goto relookup_failed;
358 	}
359 
360 relookup_failed:
361 	if (dst)
362 		return dst;
363 	return ERR_PTR(err);
364 }
365 
366 /*
367  *	Send an ICMP message in response to a packet in error
368  */
369 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
370 {
371 	struct net *net = dev_net(skb->dev);
372 	struct inet6_dev *idev = NULL;
373 	struct ipv6hdr *hdr = ipv6_hdr(skb);
374 	struct sock *sk;
375 	struct ipv6_pinfo *np;
376 	const struct in6_addr *saddr = NULL;
377 	struct dst_entry *dst;
378 	struct icmp6hdr tmp_hdr;
379 	struct flowi6 fl6;
380 	struct icmpv6_msg msg;
381 	int iif = 0;
382 	int addr_type = 0;
383 	int len;
384 	int hlimit;
385 	int err = 0;
386 
387 	if ((u8 *)hdr < skb->head ||
388 	    (skb->network_header + sizeof(*hdr)) > skb->tail)
389 		return;
390 
391 	/*
392 	 *	Make sure we respect the rules
393 	 *	i.e. RFC 1885 2.4(e)
394 	 *	Rule (e.1) is enforced by not using icmpv6_send
395 	 *	in any code that processes icmp errors.
396 	 */
397 	addr_type = ipv6_addr_type(&hdr->daddr);
398 
399 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
400 		saddr = &hdr->daddr;
401 
402 	/*
403 	 *	Dest addr check
404 	 */
405 
406 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
407 		if (type != ICMPV6_PKT_TOOBIG &&
408 		    !(type == ICMPV6_PARAMPROB &&
409 		      code == ICMPV6_UNK_OPTION &&
410 		      (opt_unrec(skb, info))))
411 			return;
412 
413 		saddr = NULL;
414 	}
415 
416 	addr_type = ipv6_addr_type(&hdr->saddr);
417 
418 	/*
419 	 *	Source addr check
420 	 */
421 
422 	if (addr_type & IPV6_ADDR_LINKLOCAL)
423 		iif = skb->dev->ifindex;
424 
425 	/*
426 	 *	Must not send error if the source does not uniquely
427 	 *	identify a single node (RFC2463 Section 2.4).
428 	 *	We check unspecified / multicast addresses here,
429 	 *	and anycast addresses will be checked later.
430 	 */
431 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
432 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
433 		return;
434 	}
435 
436 	/*
437 	 *	Never answer to a ICMP packet.
438 	 */
439 	if (is_ineligible(skb)) {
440 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
441 		return;
442 	}
443 
444 	mip6_addr_swap(skb);
445 
446 	memset(&fl6, 0, sizeof(fl6));
447 	fl6.flowi6_proto = IPPROTO_ICMPV6;
448 	fl6.daddr = hdr->saddr;
449 	if (saddr)
450 		fl6.saddr = *saddr;
451 	fl6.flowi6_oif = iif;
452 	fl6.fl6_icmp_type = type;
453 	fl6.fl6_icmp_code = code;
454 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
455 
456 	sk = icmpv6_xmit_lock(net);
457 	if (sk == NULL)
458 		return;
459 	np = inet6_sk(sk);
460 
461 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
462 		goto out;
463 
464 	tmp_hdr.icmp6_type = type;
465 	tmp_hdr.icmp6_code = code;
466 	tmp_hdr.icmp6_cksum = 0;
467 	tmp_hdr.icmp6_pointer = htonl(info);
468 
469 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
470 		fl6.flowi6_oif = np->mcast_oif;
471 	else if (!fl6.flowi6_oif)
472 		fl6.flowi6_oif = np->ucast_oif;
473 
474 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
475 	if (IS_ERR(dst))
476 		goto out;
477 
478 	if (ipv6_addr_is_multicast(&fl6.daddr))
479 		hlimit = np->mcast_hops;
480 	else
481 		hlimit = np->hop_limit;
482 	if (hlimit < 0)
483 		hlimit = ip6_dst_hoplimit(dst);
484 
485 	msg.skb = skb;
486 	msg.offset = skb_network_offset(skb);
487 	msg.type = type;
488 
489 	len = skb->len - msg.offset;
490 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
491 	if (len < 0) {
492 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
493 		goto out_dst_release;
494 	}
495 
496 	rcu_read_lock();
497 	idev = __in6_dev_get(skb->dev);
498 
499 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
500 			      len + sizeof(struct icmp6hdr),
501 			      sizeof(struct icmp6hdr), hlimit,
502 			      np->tclass, NULL, &fl6, (struct rt6_info*)dst,
503 			      MSG_DONTWAIT, np->dontfrag);
504 	if (err) {
505 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
506 		ip6_flush_pending_frames(sk);
507 	} else {
508 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
509 						 len + sizeof(struct icmp6hdr));
510 	}
511 	rcu_read_unlock();
512 out_dst_release:
513 	dst_release(dst);
514 out:
515 	icmpv6_xmit_unlock(sk);
516 }
517 EXPORT_SYMBOL(icmpv6_send);
518 
519 static void icmpv6_echo_reply(struct sk_buff *skb)
520 {
521 	struct net *net = dev_net(skb->dev);
522 	struct sock *sk;
523 	struct inet6_dev *idev;
524 	struct ipv6_pinfo *np;
525 	const struct in6_addr *saddr = NULL;
526 	struct icmp6hdr *icmph = icmp6_hdr(skb);
527 	struct icmp6hdr tmp_hdr;
528 	struct flowi6 fl6;
529 	struct icmpv6_msg msg;
530 	struct dst_entry *dst;
531 	int err = 0;
532 	int hlimit;
533 
534 	saddr = &ipv6_hdr(skb)->daddr;
535 
536 	if (!ipv6_unicast_destination(skb))
537 		saddr = NULL;
538 
539 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
540 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
541 
542 	memset(&fl6, 0, sizeof(fl6));
543 	fl6.flowi6_proto = IPPROTO_ICMPV6;
544 	fl6.daddr = ipv6_hdr(skb)->saddr;
545 	if (saddr)
546 		fl6.saddr = *saddr;
547 	fl6.flowi6_oif = skb->dev->ifindex;
548 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
549 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
550 
551 	sk = icmpv6_xmit_lock(net);
552 	if (sk == NULL)
553 		return;
554 	np = inet6_sk(sk);
555 
556 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
557 		fl6.flowi6_oif = np->mcast_oif;
558 	else if (!fl6.flowi6_oif)
559 		fl6.flowi6_oif = np->ucast_oif;
560 
561 	err = ip6_dst_lookup(sk, &dst, &fl6);
562 	if (err)
563 		goto out;
564 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
565 	if (IS_ERR(dst))
566 		goto out;
567 
568 	if (ipv6_addr_is_multicast(&fl6.daddr))
569 		hlimit = np->mcast_hops;
570 	else
571 		hlimit = np->hop_limit;
572 	if (hlimit < 0)
573 		hlimit = ip6_dst_hoplimit(dst);
574 
575 	idev = __in6_dev_get(skb->dev);
576 
577 	msg.skb = skb;
578 	msg.offset = 0;
579 	msg.type = ICMPV6_ECHO_REPLY;
580 
581 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
582 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
583 				(struct rt6_info*)dst, MSG_DONTWAIT,
584 				np->dontfrag);
585 
586 	if (err) {
587 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
588 		ip6_flush_pending_frames(sk);
589 	} else {
590 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
591 						 skb->len + sizeof(struct icmp6hdr));
592 	}
593 	dst_release(dst);
594 out:
595 	icmpv6_xmit_unlock(sk);
596 }
597 
598 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
599 {
600 	const struct inet6_protocol *ipprot;
601 	int inner_offset;
602 	int hash;
603 	u8 nexthdr;
604 	__be16 frag_off;
605 
606 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
607 		return;
608 
609 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
610 	if (ipv6_ext_hdr(nexthdr)) {
611 		/* now skip over extension headers */
612 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
613 						&nexthdr, &frag_off);
614 		if (inner_offset<0)
615 			return;
616 	} else {
617 		inner_offset = sizeof(struct ipv6hdr);
618 	}
619 
620 	/* Checkin header including 8 bytes of inner protocol header. */
621 	if (!pskb_may_pull(skb, inner_offset+8))
622 		return;
623 
624 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
625 	   Without this we will not able f.e. to make source routed
626 	   pmtu discovery.
627 	   Corresponding argument (opt) to notifiers is already added.
628 	   --ANK (980726)
629 	 */
630 
631 	hash = nexthdr & (MAX_INET_PROTOS - 1);
632 
633 	rcu_read_lock();
634 	ipprot = rcu_dereference(inet6_protos[hash]);
635 	if (ipprot && ipprot->err_handler)
636 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
637 	rcu_read_unlock();
638 
639 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
640 }
641 
642 /*
643  *	Handle icmp messages
644  */
645 
646 static int icmpv6_rcv(struct sk_buff *skb)
647 {
648 	struct net_device *dev = skb->dev;
649 	struct inet6_dev *idev = __in6_dev_get(dev);
650 	const struct in6_addr *saddr, *daddr;
651 	const struct ipv6hdr *orig_hdr;
652 	struct icmp6hdr *hdr;
653 	u8 type;
654 
655 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
656 		struct sec_path *sp = skb_sec_path(skb);
657 		int nh;
658 
659 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
660 				 XFRM_STATE_ICMP))
661 			goto drop_no_count;
662 
663 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
664 			goto drop_no_count;
665 
666 		nh = skb_network_offset(skb);
667 		skb_set_network_header(skb, sizeof(*hdr));
668 
669 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
670 			goto drop_no_count;
671 
672 		skb_set_network_header(skb, nh);
673 	}
674 
675 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
676 
677 	saddr = &ipv6_hdr(skb)->saddr;
678 	daddr = &ipv6_hdr(skb)->daddr;
679 
680 	/* Perform checksum. */
681 	switch (skb->ip_summed) {
682 	case CHECKSUM_COMPLETE:
683 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
684 				     skb->csum))
685 			break;
686 		/* fall through */
687 	case CHECKSUM_NONE:
688 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
689 					     IPPROTO_ICMPV6, 0));
690 		if (__skb_checksum_complete(skb)) {
691 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
692 				       saddr, daddr);
693 			goto discard_it;
694 		}
695 	}
696 
697 	if (!pskb_pull(skb, sizeof(*hdr)))
698 		goto discard_it;
699 
700 	hdr = icmp6_hdr(skb);
701 
702 	type = hdr->icmp6_type;
703 
704 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
705 
706 	switch (type) {
707 	case ICMPV6_ECHO_REQUEST:
708 		icmpv6_echo_reply(skb);
709 		break;
710 
711 	case ICMPV6_ECHO_REPLY:
712 		/* we couldn't care less */
713 		break;
714 
715 	case ICMPV6_PKT_TOOBIG:
716 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
717 		   standard destination cache. Seems, only "advanced"
718 		   destination cache will allow to solve this problem
719 		   --ANK (980726)
720 		 */
721 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
722 			goto discard_it;
723 		hdr = icmp6_hdr(skb);
724 		orig_hdr = (struct ipv6hdr *) (hdr + 1);
725 		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
726 				   ntohl(hdr->icmp6_mtu));
727 
728 		/*
729 		 *	Drop through to notify
730 		 */
731 
732 	case ICMPV6_DEST_UNREACH:
733 	case ICMPV6_TIME_EXCEED:
734 	case ICMPV6_PARAMPROB:
735 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
736 		break;
737 
738 	case NDISC_ROUTER_SOLICITATION:
739 	case NDISC_ROUTER_ADVERTISEMENT:
740 	case NDISC_NEIGHBOUR_SOLICITATION:
741 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
742 	case NDISC_REDIRECT:
743 		ndisc_rcv(skb);
744 		break;
745 
746 	case ICMPV6_MGM_QUERY:
747 		igmp6_event_query(skb);
748 		break;
749 
750 	case ICMPV6_MGM_REPORT:
751 		igmp6_event_report(skb);
752 		break;
753 
754 	case ICMPV6_MGM_REDUCTION:
755 	case ICMPV6_NI_QUERY:
756 	case ICMPV6_NI_REPLY:
757 	case ICMPV6_MLD2_REPORT:
758 	case ICMPV6_DHAAD_REQUEST:
759 	case ICMPV6_DHAAD_REPLY:
760 	case ICMPV6_MOBILE_PREFIX_SOL:
761 	case ICMPV6_MOBILE_PREFIX_ADV:
762 		break;
763 
764 	default:
765 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
766 
767 		/* informational */
768 		if (type & ICMPV6_INFOMSG_MASK)
769 			break;
770 
771 		/*
772 		 * error of unknown type.
773 		 * must pass to upper level
774 		 */
775 
776 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
777 	}
778 
779 	kfree_skb(skb);
780 	return 0;
781 
782 discard_it:
783 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
784 drop_no_count:
785 	kfree_skb(skb);
786 	return 0;
787 }
788 
789 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
790 		      u8 type,
791 		      const struct in6_addr *saddr,
792 		      const struct in6_addr *daddr,
793 		      int oif)
794 {
795 	memset(fl6, 0, sizeof(*fl6));
796 	fl6->saddr = *saddr;
797 	fl6->daddr = *daddr;
798 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
799 	fl6->fl6_icmp_type	= type;
800 	fl6->fl6_icmp_code	= 0;
801 	fl6->flowi6_oif		= oif;
802 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
803 }
804 
805 /*
806  * Special lock-class for __icmpv6_sk:
807  */
808 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
809 
810 static int __net_init icmpv6_sk_init(struct net *net)
811 {
812 	struct sock *sk;
813 	int err, i, j;
814 
815 	net->ipv6.icmp_sk =
816 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
817 	if (net->ipv6.icmp_sk == NULL)
818 		return -ENOMEM;
819 
820 	for_each_possible_cpu(i) {
821 		err = inet_ctl_sock_create(&sk, PF_INET6,
822 					   SOCK_RAW, IPPROTO_ICMPV6, net);
823 		if (err < 0) {
824 			printk(KERN_ERR
825 			       "Failed to initialize the ICMP6 control socket "
826 			       "(err %d).\n",
827 			       err);
828 			goto fail;
829 		}
830 
831 		net->ipv6.icmp_sk[i] = sk;
832 
833 		/*
834 		 * Split off their lock-class, because sk->sk_dst_lock
835 		 * gets used from softirqs, which is safe for
836 		 * __icmpv6_sk (because those never get directly used
837 		 * via userspace syscalls), but unsafe for normal sockets.
838 		 */
839 		lockdep_set_class(&sk->sk_dst_lock,
840 				  &icmpv6_socket_sk_dst_lock_key);
841 
842 		/* Enough space for 2 64K ICMP packets, including
843 		 * sk_buff struct overhead.
844 		 */
845 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
846 	}
847 	return 0;
848 
849  fail:
850 	for (j = 0; j < i; j++)
851 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
852 	kfree(net->ipv6.icmp_sk);
853 	return err;
854 }
855 
856 static void __net_exit icmpv6_sk_exit(struct net *net)
857 {
858 	int i;
859 
860 	for_each_possible_cpu(i) {
861 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
862 	}
863 	kfree(net->ipv6.icmp_sk);
864 }
865 
866 static struct pernet_operations icmpv6_sk_ops = {
867        .init = icmpv6_sk_init,
868        .exit = icmpv6_sk_exit,
869 };
870 
871 int __init icmpv6_init(void)
872 {
873 	int err;
874 
875 	err = register_pernet_subsys(&icmpv6_sk_ops);
876 	if (err < 0)
877 		return err;
878 
879 	err = -EAGAIN;
880 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
881 		goto fail;
882 	return 0;
883 
884 fail:
885 	printk(KERN_ERR "Failed to register ICMP6 protocol\n");
886 	unregister_pernet_subsys(&icmpv6_sk_ops);
887 	return err;
888 }
889 
890 void icmpv6_cleanup(void)
891 {
892 	unregister_pernet_subsys(&icmpv6_sk_ops);
893 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
894 }
895 
896 
897 static const struct icmp6_err {
898 	int err;
899 	int fatal;
900 } tab_unreach[] = {
901 	{	/* NOROUTE */
902 		.err	= ENETUNREACH,
903 		.fatal	= 0,
904 	},
905 	{	/* ADM_PROHIBITED */
906 		.err	= EACCES,
907 		.fatal	= 1,
908 	},
909 	{	/* Was NOT_NEIGHBOUR, now reserved */
910 		.err	= EHOSTUNREACH,
911 		.fatal	= 0,
912 	},
913 	{	/* ADDR_UNREACH	*/
914 		.err	= EHOSTUNREACH,
915 		.fatal	= 0,
916 	},
917 	{	/* PORT_UNREACH	*/
918 		.err	= ECONNREFUSED,
919 		.fatal	= 1,
920 	},
921 };
922 
923 int icmpv6_err_convert(u8 type, u8 code, int *err)
924 {
925 	int fatal = 0;
926 
927 	*err = EPROTO;
928 
929 	switch (type) {
930 	case ICMPV6_DEST_UNREACH:
931 		fatal = 1;
932 		if (code <= ICMPV6_PORT_UNREACH) {
933 			*err  = tab_unreach[code].err;
934 			fatal = tab_unreach[code].fatal;
935 		}
936 		break;
937 
938 	case ICMPV6_PKT_TOOBIG:
939 		*err = EMSGSIZE;
940 		break;
941 
942 	case ICMPV6_PARAMPROB:
943 		*err = EPROTO;
944 		fatal = 1;
945 		break;
946 
947 	case ICMPV6_TIME_EXCEED:
948 		*err = EHOSTUNREACH;
949 		break;
950 	}
951 
952 	return fatal;
953 }
954 
955 EXPORT_SYMBOL(icmpv6_err_convert);
956 
957 #ifdef CONFIG_SYSCTL
958 ctl_table ipv6_icmp_table_template[] = {
959 	{
960 		.procname	= "ratelimit",
961 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
962 		.maxlen		= sizeof(int),
963 		.mode		= 0644,
964 		.proc_handler	= proc_dointvec_ms_jiffies,
965 	},
966 	{ },
967 };
968 
969 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
970 {
971 	struct ctl_table *table;
972 
973 	table = kmemdup(ipv6_icmp_table_template,
974 			sizeof(ipv6_icmp_table_template),
975 			GFP_KERNEL);
976 
977 	if (table)
978 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
979 
980 	return table;
981 }
982 #endif
983 
984