xref: /openbmc/linux/net/ipv6/icmp.c (revision 9ffc93f2)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #include <linux/module.h>
33 #include <linux/errno.h>
34 #include <linux/types.h>
35 #include <linux/socket.h>
36 #include <linux/in.h>
37 #include <linux/kernel.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/skbuff.h>
41 #include <linux/init.h>
42 #include <linux/netfilter.h>
43 #include <linux/slab.h>
44 
45 #ifdef CONFIG_SYSCTL
46 #include <linux/sysctl.h>
47 #endif
48 
49 #include <linux/inet.h>
50 #include <linux/netdevice.h>
51 #include <linux/icmpv6.h>
52 
53 #include <net/ip.h>
54 #include <net/sock.h>
55 
56 #include <net/ipv6.h>
57 #include <net/ip6_checksum.h>
58 #include <net/protocol.h>
59 #include <net/raw.h>
60 #include <net/rawv6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 
68 #include <asm/uaccess.h>
69 
70 /*
71  *	The ICMP socket(s). This is the most convenient way to flow control
72  *	our ICMP output as well as maintain a clean interface throughout
73  *	all layers. All Socketless IP sends will soon be gone.
74  *
75  *	On SMP we have one ICMP socket per-cpu.
76  */
77 static inline struct sock *icmpv6_sk(struct net *net)
78 {
79 	return net->ipv6.icmp_sk[smp_processor_id()];
80 }
81 
82 static int icmpv6_rcv(struct sk_buff *skb);
83 
84 static const struct inet6_protocol icmpv6_protocol = {
85 	.handler	=	icmpv6_rcv,
86 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
87 };
88 
89 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
90 {
91 	struct sock *sk;
92 
93 	local_bh_disable();
94 
95 	sk = icmpv6_sk(net);
96 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
97 		/* This can happen if the output path (f.e. SIT or
98 		 * ip6ip6 tunnel) signals dst_link_failure() for an
99 		 * outgoing ICMP6 packet.
100 		 */
101 		local_bh_enable();
102 		return NULL;
103 	}
104 	return sk;
105 }
106 
107 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
108 {
109 	spin_unlock_bh(&sk->sk_lock.slock);
110 }
111 
112 /*
113  * Slightly more convenient version of icmpv6_send.
114  */
115 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
116 {
117 	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
118 	kfree_skb(skb);
119 }
120 
121 /*
122  * Figure out, may we reply to this packet with icmp error.
123  *
124  * We do not reply, if:
125  *	- it was icmp error message.
126  *	- it is truncated, so that it is known, that protocol is ICMPV6
127  *	  (i.e. in the middle of some exthdr)
128  *
129  *	--ANK (980726)
130  */
131 
132 static int is_ineligible(struct sk_buff *skb)
133 {
134 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
135 	int len = skb->len - ptr;
136 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
137 	__be16 frag_off;
138 
139 	if (len < 0)
140 		return 1;
141 
142 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
143 	if (ptr < 0)
144 		return 0;
145 	if (nexthdr == IPPROTO_ICMPV6) {
146 		u8 _type, *tp;
147 		tp = skb_header_pointer(skb,
148 			ptr+offsetof(struct icmp6hdr, icmp6_type),
149 			sizeof(_type), &_type);
150 		if (tp == NULL ||
151 		    !(*tp & ICMPV6_INFOMSG_MASK))
152 			return 1;
153 	}
154 	return 0;
155 }
156 
157 /*
158  * Check the ICMP output rate limit
159  */
160 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
161 				      struct flowi6 *fl6)
162 {
163 	struct dst_entry *dst;
164 	struct net *net = sock_net(sk);
165 	bool res = false;
166 
167 	/* Informational messages are not limited. */
168 	if (type & ICMPV6_INFOMSG_MASK)
169 		return true;
170 
171 	/* Do not limit pmtu discovery, it would break it. */
172 	if (type == ICMPV6_PKT_TOOBIG)
173 		return true;
174 
175 	/*
176 	 * Look up the output route.
177 	 * XXX: perhaps the expire for routing entries cloned by
178 	 * this lookup should be more aggressive (not longer than timeout).
179 	 */
180 	dst = ip6_route_output(net, sk, fl6);
181 	if (dst->error) {
182 		IP6_INC_STATS(net, ip6_dst_idev(dst),
183 			      IPSTATS_MIB_OUTNOROUTES);
184 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
185 		res = true;
186 	} else {
187 		struct rt6_info *rt = (struct rt6_info *)dst;
188 		int tmo = net->ipv6.sysctl.icmpv6_time;
189 
190 		/* Give more bandwidth to wider prefixes. */
191 		if (rt->rt6i_dst.plen < 128)
192 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 
194 		if (!rt->rt6i_peer)
195 			rt6_bind_peer(rt, 1);
196 		res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
197 	}
198 	dst_release(dst);
199 	return res;
200 }
201 
202 /*
203  *	an inline helper for the "simple" if statement below
204  *	checks if parameter problem report is caused by an
205  *	unrecognized IPv6 option that has the Option Type
206  *	highest-order two bits set to 10
207  */
208 
209 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
210 {
211 	u8 _optval, *op;
212 
213 	offset += skb_network_offset(skb);
214 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
215 	if (op == NULL)
216 		return 1;
217 	return (*op & 0xC0) == 0x80;
218 }
219 
220 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
221 {
222 	struct sk_buff *skb;
223 	struct icmp6hdr *icmp6h;
224 	int err = 0;
225 
226 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
227 		goto out;
228 
229 	icmp6h = icmp6_hdr(skb);
230 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
231 	icmp6h->icmp6_cksum = 0;
232 
233 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
234 		skb->csum = csum_partial(icmp6h,
235 					sizeof(struct icmp6hdr), skb->csum);
236 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
237 						      &fl6->daddr,
238 						      len, fl6->flowi6_proto,
239 						      skb->csum);
240 	} else {
241 		__wsum tmp_csum = 0;
242 
243 		skb_queue_walk(&sk->sk_write_queue, skb) {
244 			tmp_csum = csum_add(tmp_csum, skb->csum);
245 		}
246 
247 		tmp_csum = csum_partial(icmp6h,
248 					sizeof(struct icmp6hdr), tmp_csum);
249 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
250 						      &fl6->daddr,
251 						      len, fl6->flowi6_proto,
252 						      tmp_csum);
253 	}
254 	ip6_push_pending_frames(sk);
255 out:
256 	return err;
257 }
258 
259 struct icmpv6_msg {
260 	struct sk_buff	*skb;
261 	int		offset;
262 	uint8_t		type;
263 };
264 
265 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
266 {
267 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
268 	struct sk_buff *org_skb = msg->skb;
269 	__wsum csum = 0;
270 
271 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
272 				      to, len, csum);
273 	skb->csum = csum_block_add(skb->csum, csum, odd);
274 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
275 		nf_ct_attach(skb, org_skb);
276 	return 0;
277 }
278 
279 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
280 static void mip6_addr_swap(struct sk_buff *skb)
281 {
282 	struct ipv6hdr *iph = ipv6_hdr(skb);
283 	struct inet6_skb_parm *opt = IP6CB(skb);
284 	struct ipv6_destopt_hao *hao;
285 	struct in6_addr tmp;
286 	int off;
287 
288 	if (opt->dsthao) {
289 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
290 		if (likely(off >= 0)) {
291 			hao = (struct ipv6_destopt_hao *)
292 					(skb_network_header(skb) + off);
293 			tmp = iph->saddr;
294 			iph->saddr = hao->addr;
295 			hao->addr = tmp;
296 		}
297 	}
298 }
299 #else
300 static inline void mip6_addr_swap(struct sk_buff *skb) {}
301 #endif
302 
303 static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
304 					     struct sock *sk, struct flowi6 *fl6)
305 {
306 	struct dst_entry *dst, *dst2;
307 	struct flowi6 fl2;
308 	int err;
309 
310 	err = ip6_dst_lookup(sk, &dst, fl6);
311 	if (err)
312 		return ERR_PTR(err);
313 
314 	/*
315 	 * We won't send icmp if the destination is known
316 	 * anycast.
317 	 */
318 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
319 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
320 		dst_release(dst);
321 		return ERR_PTR(-EINVAL);
322 	}
323 
324 	/* No need to clone since we're just using its address. */
325 	dst2 = dst;
326 
327 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
328 	if (!IS_ERR(dst)) {
329 		if (dst != dst2)
330 			return dst;
331 	} else {
332 		if (PTR_ERR(dst) == -EPERM)
333 			dst = NULL;
334 		else
335 			return dst;
336 	}
337 
338 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
339 	if (err)
340 		goto relookup_failed;
341 
342 	err = ip6_dst_lookup(sk, &dst2, &fl2);
343 	if (err)
344 		goto relookup_failed;
345 
346 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
347 	if (!IS_ERR(dst2)) {
348 		dst_release(dst);
349 		dst = dst2;
350 	} else {
351 		err = PTR_ERR(dst2);
352 		if (err == -EPERM) {
353 			dst_release(dst);
354 			return dst2;
355 		} else
356 			goto relookup_failed;
357 	}
358 
359 relookup_failed:
360 	if (dst)
361 		return dst;
362 	return ERR_PTR(err);
363 }
364 
365 /*
366  *	Send an ICMP message in response to a packet in error
367  */
368 void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
369 {
370 	struct net *net = dev_net(skb->dev);
371 	struct inet6_dev *idev = NULL;
372 	struct ipv6hdr *hdr = ipv6_hdr(skb);
373 	struct sock *sk;
374 	struct ipv6_pinfo *np;
375 	const struct in6_addr *saddr = NULL;
376 	struct dst_entry *dst;
377 	struct icmp6hdr tmp_hdr;
378 	struct flowi6 fl6;
379 	struct icmpv6_msg msg;
380 	int iif = 0;
381 	int addr_type = 0;
382 	int len;
383 	int hlimit;
384 	int err = 0;
385 
386 	if ((u8 *)hdr < skb->head ||
387 	    (skb->network_header + sizeof(*hdr)) > skb->tail)
388 		return;
389 
390 	/*
391 	 *	Make sure we respect the rules
392 	 *	i.e. RFC 1885 2.4(e)
393 	 *	Rule (e.1) is enforced by not using icmpv6_send
394 	 *	in any code that processes icmp errors.
395 	 */
396 	addr_type = ipv6_addr_type(&hdr->daddr);
397 
398 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
399 		saddr = &hdr->daddr;
400 
401 	/*
402 	 *	Dest addr check
403 	 */
404 
405 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
406 		if (type != ICMPV6_PKT_TOOBIG &&
407 		    !(type == ICMPV6_PARAMPROB &&
408 		      code == ICMPV6_UNK_OPTION &&
409 		      (opt_unrec(skb, info))))
410 			return;
411 
412 		saddr = NULL;
413 	}
414 
415 	addr_type = ipv6_addr_type(&hdr->saddr);
416 
417 	/*
418 	 *	Source addr check
419 	 */
420 
421 	if (addr_type & IPV6_ADDR_LINKLOCAL)
422 		iif = skb->dev->ifindex;
423 
424 	/*
425 	 *	Must not send error if the source does not uniquely
426 	 *	identify a single node (RFC2463 Section 2.4).
427 	 *	We check unspecified / multicast addresses here,
428 	 *	and anycast addresses will be checked later.
429 	 */
430 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
431 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
432 		return;
433 	}
434 
435 	/*
436 	 *	Never answer to a ICMP packet.
437 	 */
438 	if (is_ineligible(skb)) {
439 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
440 		return;
441 	}
442 
443 	mip6_addr_swap(skb);
444 
445 	memset(&fl6, 0, sizeof(fl6));
446 	fl6.flowi6_proto = IPPROTO_ICMPV6;
447 	fl6.daddr = hdr->saddr;
448 	if (saddr)
449 		fl6.saddr = *saddr;
450 	fl6.flowi6_oif = iif;
451 	fl6.fl6_icmp_type = type;
452 	fl6.fl6_icmp_code = code;
453 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
454 
455 	sk = icmpv6_xmit_lock(net);
456 	if (sk == NULL)
457 		return;
458 	np = inet6_sk(sk);
459 
460 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
461 		goto out;
462 
463 	tmp_hdr.icmp6_type = type;
464 	tmp_hdr.icmp6_code = code;
465 	tmp_hdr.icmp6_cksum = 0;
466 	tmp_hdr.icmp6_pointer = htonl(info);
467 
468 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
469 		fl6.flowi6_oif = np->mcast_oif;
470 	else if (!fl6.flowi6_oif)
471 		fl6.flowi6_oif = np->ucast_oif;
472 
473 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
474 	if (IS_ERR(dst))
475 		goto out;
476 
477 	if (ipv6_addr_is_multicast(&fl6.daddr))
478 		hlimit = np->mcast_hops;
479 	else
480 		hlimit = np->hop_limit;
481 	if (hlimit < 0)
482 		hlimit = ip6_dst_hoplimit(dst);
483 
484 	msg.skb = skb;
485 	msg.offset = skb_network_offset(skb);
486 	msg.type = type;
487 
488 	len = skb->len - msg.offset;
489 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
490 	if (len < 0) {
491 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
492 		goto out_dst_release;
493 	}
494 
495 	rcu_read_lock();
496 	idev = __in6_dev_get(skb->dev);
497 
498 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
499 			      len + sizeof(struct icmp6hdr),
500 			      sizeof(struct icmp6hdr), hlimit,
501 			      np->tclass, NULL, &fl6, (struct rt6_info*)dst,
502 			      MSG_DONTWAIT, np->dontfrag);
503 	if (err) {
504 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
505 		ip6_flush_pending_frames(sk);
506 	} else {
507 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
508 						 len + sizeof(struct icmp6hdr));
509 	}
510 	rcu_read_unlock();
511 out_dst_release:
512 	dst_release(dst);
513 out:
514 	icmpv6_xmit_unlock(sk);
515 }
516 EXPORT_SYMBOL(icmpv6_send);
517 
518 static void icmpv6_echo_reply(struct sk_buff *skb)
519 {
520 	struct net *net = dev_net(skb->dev);
521 	struct sock *sk;
522 	struct inet6_dev *idev;
523 	struct ipv6_pinfo *np;
524 	const struct in6_addr *saddr = NULL;
525 	struct icmp6hdr *icmph = icmp6_hdr(skb);
526 	struct icmp6hdr tmp_hdr;
527 	struct flowi6 fl6;
528 	struct icmpv6_msg msg;
529 	struct dst_entry *dst;
530 	int err = 0;
531 	int hlimit;
532 
533 	saddr = &ipv6_hdr(skb)->daddr;
534 
535 	if (!ipv6_unicast_destination(skb))
536 		saddr = NULL;
537 
538 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
539 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
540 
541 	memset(&fl6, 0, sizeof(fl6));
542 	fl6.flowi6_proto = IPPROTO_ICMPV6;
543 	fl6.daddr = ipv6_hdr(skb)->saddr;
544 	if (saddr)
545 		fl6.saddr = *saddr;
546 	fl6.flowi6_oif = skb->dev->ifindex;
547 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
548 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
549 
550 	sk = icmpv6_xmit_lock(net);
551 	if (sk == NULL)
552 		return;
553 	np = inet6_sk(sk);
554 
555 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
556 		fl6.flowi6_oif = np->mcast_oif;
557 	else if (!fl6.flowi6_oif)
558 		fl6.flowi6_oif = np->ucast_oif;
559 
560 	err = ip6_dst_lookup(sk, &dst, &fl6);
561 	if (err)
562 		goto out;
563 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
564 	if (IS_ERR(dst))
565 		goto out;
566 
567 	if (ipv6_addr_is_multicast(&fl6.daddr))
568 		hlimit = np->mcast_hops;
569 	else
570 		hlimit = np->hop_limit;
571 	if (hlimit < 0)
572 		hlimit = ip6_dst_hoplimit(dst);
573 
574 	idev = __in6_dev_get(skb->dev);
575 
576 	msg.skb = skb;
577 	msg.offset = 0;
578 	msg.type = ICMPV6_ECHO_REPLY;
579 
580 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
581 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
582 				(struct rt6_info*)dst, MSG_DONTWAIT,
583 				np->dontfrag);
584 
585 	if (err) {
586 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
587 		ip6_flush_pending_frames(sk);
588 	} else {
589 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
590 						 skb->len + sizeof(struct icmp6hdr));
591 	}
592 	dst_release(dst);
593 out:
594 	icmpv6_xmit_unlock(sk);
595 }
596 
597 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
598 {
599 	const struct inet6_protocol *ipprot;
600 	int inner_offset;
601 	int hash;
602 	u8 nexthdr;
603 	__be16 frag_off;
604 
605 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
606 		return;
607 
608 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
609 	if (ipv6_ext_hdr(nexthdr)) {
610 		/* now skip over extension headers */
611 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
612 						&nexthdr, &frag_off);
613 		if (inner_offset<0)
614 			return;
615 	} else {
616 		inner_offset = sizeof(struct ipv6hdr);
617 	}
618 
619 	/* Checkin header including 8 bytes of inner protocol header. */
620 	if (!pskb_may_pull(skb, inner_offset+8))
621 		return;
622 
623 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
624 	   Without this we will not able f.e. to make source routed
625 	   pmtu discovery.
626 	   Corresponding argument (opt) to notifiers is already added.
627 	   --ANK (980726)
628 	 */
629 
630 	hash = nexthdr & (MAX_INET_PROTOS - 1);
631 
632 	rcu_read_lock();
633 	ipprot = rcu_dereference(inet6_protos[hash]);
634 	if (ipprot && ipprot->err_handler)
635 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
636 	rcu_read_unlock();
637 
638 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
639 }
640 
641 /*
642  *	Handle icmp messages
643  */
644 
645 static int icmpv6_rcv(struct sk_buff *skb)
646 {
647 	struct net_device *dev = skb->dev;
648 	struct inet6_dev *idev = __in6_dev_get(dev);
649 	const struct in6_addr *saddr, *daddr;
650 	const struct ipv6hdr *orig_hdr;
651 	struct icmp6hdr *hdr;
652 	u8 type;
653 
654 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
655 		struct sec_path *sp = skb_sec_path(skb);
656 		int nh;
657 
658 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
659 				 XFRM_STATE_ICMP))
660 			goto drop_no_count;
661 
662 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
663 			goto drop_no_count;
664 
665 		nh = skb_network_offset(skb);
666 		skb_set_network_header(skb, sizeof(*hdr));
667 
668 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
669 			goto drop_no_count;
670 
671 		skb_set_network_header(skb, nh);
672 	}
673 
674 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
675 
676 	saddr = &ipv6_hdr(skb)->saddr;
677 	daddr = &ipv6_hdr(skb)->daddr;
678 
679 	/* Perform checksum. */
680 	switch (skb->ip_summed) {
681 	case CHECKSUM_COMPLETE:
682 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
683 				     skb->csum))
684 			break;
685 		/* fall through */
686 	case CHECKSUM_NONE:
687 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
688 					     IPPROTO_ICMPV6, 0));
689 		if (__skb_checksum_complete(skb)) {
690 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
691 				       saddr, daddr);
692 			goto discard_it;
693 		}
694 	}
695 
696 	if (!pskb_pull(skb, sizeof(*hdr)))
697 		goto discard_it;
698 
699 	hdr = icmp6_hdr(skb);
700 
701 	type = hdr->icmp6_type;
702 
703 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
704 
705 	switch (type) {
706 	case ICMPV6_ECHO_REQUEST:
707 		icmpv6_echo_reply(skb);
708 		break;
709 
710 	case ICMPV6_ECHO_REPLY:
711 		/* we couldn't care less */
712 		break;
713 
714 	case ICMPV6_PKT_TOOBIG:
715 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
716 		   standard destination cache. Seems, only "advanced"
717 		   destination cache will allow to solve this problem
718 		   --ANK (980726)
719 		 */
720 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
721 			goto discard_it;
722 		hdr = icmp6_hdr(skb);
723 		orig_hdr = (struct ipv6hdr *) (hdr + 1);
724 		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
725 				   ntohl(hdr->icmp6_mtu));
726 
727 		/*
728 		 *	Drop through to notify
729 		 */
730 
731 	case ICMPV6_DEST_UNREACH:
732 	case ICMPV6_TIME_EXCEED:
733 	case ICMPV6_PARAMPROB:
734 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
735 		break;
736 
737 	case NDISC_ROUTER_SOLICITATION:
738 	case NDISC_ROUTER_ADVERTISEMENT:
739 	case NDISC_NEIGHBOUR_SOLICITATION:
740 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
741 	case NDISC_REDIRECT:
742 		ndisc_rcv(skb);
743 		break;
744 
745 	case ICMPV6_MGM_QUERY:
746 		igmp6_event_query(skb);
747 		break;
748 
749 	case ICMPV6_MGM_REPORT:
750 		igmp6_event_report(skb);
751 		break;
752 
753 	case ICMPV6_MGM_REDUCTION:
754 	case ICMPV6_NI_QUERY:
755 	case ICMPV6_NI_REPLY:
756 	case ICMPV6_MLD2_REPORT:
757 	case ICMPV6_DHAAD_REQUEST:
758 	case ICMPV6_DHAAD_REPLY:
759 	case ICMPV6_MOBILE_PREFIX_SOL:
760 	case ICMPV6_MOBILE_PREFIX_ADV:
761 		break;
762 
763 	default:
764 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
765 
766 		/* informational */
767 		if (type & ICMPV6_INFOMSG_MASK)
768 			break;
769 
770 		/*
771 		 * error of unknown type.
772 		 * must pass to upper level
773 		 */
774 
775 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
776 	}
777 
778 	kfree_skb(skb);
779 	return 0;
780 
781 discard_it:
782 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
783 drop_no_count:
784 	kfree_skb(skb);
785 	return 0;
786 }
787 
788 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
789 		      u8 type,
790 		      const struct in6_addr *saddr,
791 		      const struct in6_addr *daddr,
792 		      int oif)
793 {
794 	memset(fl6, 0, sizeof(*fl6));
795 	fl6->saddr = *saddr;
796 	fl6->daddr = *daddr;
797 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
798 	fl6->fl6_icmp_type	= type;
799 	fl6->fl6_icmp_code	= 0;
800 	fl6->flowi6_oif		= oif;
801 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
802 }
803 
804 /*
805  * Special lock-class for __icmpv6_sk:
806  */
807 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
808 
809 static int __net_init icmpv6_sk_init(struct net *net)
810 {
811 	struct sock *sk;
812 	int err, i, j;
813 
814 	net->ipv6.icmp_sk =
815 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
816 	if (net->ipv6.icmp_sk == NULL)
817 		return -ENOMEM;
818 
819 	for_each_possible_cpu(i) {
820 		err = inet_ctl_sock_create(&sk, PF_INET6,
821 					   SOCK_RAW, IPPROTO_ICMPV6, net);
822 		if (err < 0) {
823 			printk(KERN_ERR
824 			       "Failed to initialize the ICMP6 control socket "
825 			       "(err %d).\n",
826 			       err);
827 			goto fail;
828 		}
829 
830 		net->ipv6.icmp_sk[i] = sk;
831 
832 		/*
833 		 * Split off their lock-class, because sk->sk_dst_lock
834 		 * gets used from softirqs, which is safe for
835 		 * __icmpv6_sk (because those never get directly used
836 		 * via userspace syscalls), but unsafe for normal sockets.
837 		 */
838 		lockdep_set_class(&sk->sk_dst_lock,
839 				  &icmpv6_socket_sk_dst_lock_key);
840 
841 		/* Enough space for 2 64K ICMP packets, including
842 		 * sk_buff struct overhead.
843 		 */
844 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
845 	}
846 	return 0;
847 
848  fail:
849 	for (j = 0; j < i; j++)
850 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
851 	kfree(net->ipv6.icmp_sk);
852 	return err;
853 }
854 
855 static void __net_exit icmpv6_sk_exit(struct net *net)
856 {
857 	int i;
858 
859 	for_each_possible_cpu(i) {
860 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
861 	}
862 	kfree(net->ipv6.icmp_sk);
863 }
864 
865 static struct pernet_operations icmpv6_sk_ops = {
866        .init = icmpv6_sk_init,
867        .exit = icmpv6_sk_exit,
868 };
869 
870 int __init icmpv6_init(void)
871 {
872 	int err;
873 
874 	err = register_pernet_subsys(&icmpv6_sk_ops);
875 	if (err < 0)
876 		return err;
877 
878 	err = -EAGAIN;
879 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
880 		goto fail;
881 	return 0;
882 
883 fail:
884 	printk(KERN_ERR "Failed to register ICMP6 protocol\n");
885 	unregister_pernet_subsys(&icmpv6_sk_ops);
886 	return err;
887 }
888 
889 void icmpv6_cleanup(void)
890 {
891 	unregister_pernet_subsys(&icmpv6_sk_ops);
892 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
893 }
894 
895 
896 static const struct icmp6_err {
897 	int err;
898 	int fatal;
899 } tab_unreach[] = {
900 	{	/* NOROUTE */
901 		.err	= ENETUNREACH,
902 		.fatal	= 0,
903 	},
904 	{	/* ADM_PROHIBITED */
905 		.err	= EACCES,
906 		.fatal	= 1,
907 	},
908 	{	/* Was NOT_NEIGHBOUR, now reserved */
909 		.err	= EHOSTUNREACH,
910 		.fatal	= 0,
911 	},
912 	{	/* ADDR_UNREACH	*/
913 		.err	= EHOSTUNREACH,
914 		.fatal	= 0,
915 	},
916 	{	/* PORT_UNREACH	*/
917 		.err	= ECONNREFUSED,
918 		.fatal	= 1,
919 	},
920 };
921 
922 int icmpv6_err_convert(u8 type, u8 code, int *err)
923 {
924 	int fatal = 0;
925 
926 	*err = EPROTO;
927 
928 	switch (type) {
929 	case ICMPV6_DEST_UNREACH:
930 		fatal = 1;
931 		if (code <= ICMPV6_PORT_UNREACH) {
932 			*err  = tab_unreach[code].err;
933 			fatal = tab_unreach[code].fatal;
934 		}
935 		break;
936 
937 	case ICMPV6_PKT_TOOBIG:
938 		*err = EMSGSIZE;
939 		break;
940 
941 	case ICMPV6_PARAMPROB:
942 		*err = EPROTO;
943 		fatal = 1;
944 		break;
945 
946 	case ICMPV6_TIME_EXCEED:
947 		*err = EHOSTUNREACH;
948 		break;
949 	}
950 
951 	return fatal;
952 }
953 
954 EXPORT_SYMBOL(icmpv6_err_convert);
955 
956 #ifdef CONFIG_SYSCTL
957 ctl_table ipv6_icmp_table_template[] = {
958 	{
959 		.procname	= "ratelimit",
960 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
961 		.maxlen		= sizeof(int),
962 		.mode		= 0644,
963 		.proc_handler	= proc_dointvec_ms_jiffies,
964 	},
965 	{ },
966 };
967 
968 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
969 {
970 	struct ctl_table *table;
971 
972 	table = kmemdup(ipv6_icmp_table_template,
973 			sizeof(ipv6_icmp_table_template),
974 			GFP_KERNEL);
975 
976 	if (table)
977 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
978 
979 	return table;
980 }
981 #endif
982 
983