xref: /openbmc/linux/net/ipv6/icmp.c (revision ec35b61e)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 
72 #include <asm/uaccess.h>
73 
74 /*
75  *	The ICMP socket(s). This is the most convenient way to flow control
76  *	our ICMP output as well as maintain a clean interface throughout
77  *	all layers. All Socketless IP sends will soon be gone.
78  *
79  *	On SMP we have one ICMP socket per-cpu.
80  */
81 static inline struct sock *icmpv6_sk(struct net *net)
82 {
83 	return net->ipv6.icmp_sk[smp_processor_id()];
84 }
85 
86 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
87 		       u8 type, u8 code, int offset, __be32 info)
88 {
89 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
90 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
91 	struct net *net = dev_net(skb->dev);
92 
93 	if (type == ICMPV6_PKT_TOOBIG)
94 		ip6_update_pmtu(skb, net, info, 0, 0);
95 	else if (type == NDISC_REDIRECT)
96 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
97 
98 	if (!(type & ICMPV6_INFOMSG_MASK))
99 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
100 			ping_err(skb, offset, info);
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
112 {
113 	struct sock *sk;
114 
115 	local_bh_disable();
116 
117 	sk = icmpv6_sk(net);
118 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
119 		/* This can happen if the output path (f.e. SIT or
120 		 * ip6ip6 tunnel) signals dst_link_failure() for an
121 		 * outgoing ICMP6 packet.
122 		 */
123 		local_bh_enable();
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock_bh(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (tp == NULL ||
164 		    !(*tp & ICMPV6_INFOMSG_MASK))
165 			return true;
166 	}
167 	return false;
168 }
169 
170 /*
171  * Check the ICMP output rate limit
172  */
173 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
174 				      struct flowi6 *fl6)
175 {
176 	struct dst_entry *dst;
177 	struct net *net = sock_net(sk);
178 	bool res = false;
179 
180 	/* Informational messages are not limited. */
181 	if (type & ICMPV6_INFOMSG_MASK)
182 		return true;
183 
184 	/* Do not limit pmtu discovery, it would break it. */
185 	if (type == ICMPV6_PKT_TOOBIG)
186 		return true;
187 
188 	/*
189 	 * Look up the output route.
190 	 * XXX: perhaps the expire for routing entries cloned by
191 	 * this lookup should be more aggressive (not longer than timeout).
192 	 */
193 	dst = ip6_route_output(net, sk, fl6);
194 	if (dst->error) {
195 		IP6_INC_STATS(net, ip6_dst_idev(dst),
196 			      IPSTATS_MIB_OUTNOROUTES);
197 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
198 		res = true;
199 	} else {
200 		struct rt6_info *rt = (struct rt6_info *)dst;
201 		int tmo = net->ipv6.sysctl.icmpv6_time;
202 		struct inet_peer *peer;
203 
204 		/* Give more bandwidth to wider prefixes. */
205 		if (rt->rt6i_dst.plen < 128)
206 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
207 
208 		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
209 		res = inet_peer_xrlim_allow(peer, tmo);
210 		if (peer)
211 			inet_putpeer(peer);
212 	}
213 	dst_release(dst);
214 	return res;
215 }
216 
217 /*
218  *	an inline helper for the "simple" if statement below
219  *	checks if parameter problem report is caused by an
220  *	unrecognized IPv6 option that has the Option Type
221  *	highest-order two bits set to 10
222  */
223 
224 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
225 {
226 	u8 _optval, *op;
227 
228 	offset += skb_network_offset(skb);
229 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
230 	if (op == NULL)
231 		return true;
232 	return (*op & 0xC0) == 0x80;
233 }
234 
235 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
236 			       struct icmp6hdr *thdr, int len)
237 {
238 	struct sk_buff *skb;
239 	struct icmp6hdr *icmp6h;
240 	int err = 0;
241 
242 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
243 		goto out;
244 
245 	icmp6h = icmp6_hdr(skb);
246 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
247 	icmp6h->icmp6_cksum = 0;
248 
249 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
250 		skb->csum = csum_partial(icmp6h,
251 					sizeof(struct icmp6hdr), skb->csum);
252 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
253 						      &fl6->daddr,
254 						      len, fl6->flowi6_proto,
255 						      skb->csum);
256 	} else {
257 		__wsum tmp_csum = 0;
258 
259 		skb_queue_walk(&sk->sk_write_queue, skb) {
260 			tmp_csum = csum_add(tmp_csum, skb->csum);
261 		}
262 
263 		tmp_csum = csum_partial(icmp6h,
264 					sizeof(struct icmp6hdr), tmp_csum);
265 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
266 						      &fl6->daddr,
267 						      len, fl6->flowi6_proto,
268 						      tmp_csum);
269 	}
270 	ip6_push_pending_frames(sk);
271 out:
272 	return err;
273 }
274 
275 struct icmpv6_msg {
276 	struct sk_buff	*skb;
277 	int		offset;
278 	uint8_t		type;
279 };
280 
281 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
282 {
283 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
284 	struct sk_buff *org_skb = msg->skb;
285 	__wsum csum = 0;
286 
287 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
288 				      to, len, csum);
289 	skb->csum = csum_block_add(skb->csum, csum, odd);
290 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
291 		nf_ct_attach(skb, org_skb);
292 	return 0;
293 }
294 
295 #if IS_ENABLED(CONFIG_IPV6_MIP6)
296 static void mip6_addr_swap(struct sk_buff *skb)
297 {
298 	struct ipv6hdr *iph = ipv6_hdr(skb);
299 	struct inet6_skb_parm *opt = IP6CB(skb);
300 	struct ipv6_destopt_hao *hao;
301 	struct in6_addr tmp;
302 	int off;
303 
304 	if (opt->dsthao) {
305 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
306 		if (likely(off >= 0)) {
307 			hao = (struct ipv6_destopt_hao *)
308 					(skb_network_header(skb) + off);
309 			tmp = iph->saddr;
310 			iph->saddr = hao->addr;
311 			hao->addr = tmp;
312 		}
313 	}
314 }
315 #else
316 static inline void mip6_addr_swap(struct sk_buff *skb) {}
317 #endif
318 
319 static struct dst_entry *icmpv6_route_lookup(struct net *net,
320 					     struct sk_buff *skb,
321 					     struct sock *sk,
322 					     struct flowi6 *fl6)
323 {
324 	struct dst_entry *dst, *dst2;
325 	struct flowi6 fl2;
326 	int err;
327 
328 	err = ip6_dst_lookup(sk, &dst, fl6);
329 	if (err)
330 		return ERR_PTR(err);
331 
332 	/*
333 	 * We won't send icmp if the destination is known
334 	 * anycast.
335 	 */
336 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
337 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
338 		dst_release(dst);
339 		return ERR_PTR(-EINVAL);
340 	}
341 
342 	/* No need to clone since we're just using its address. */
343 	dst2 = dst;
344 
345 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
346 	if (!IS_ERR(dst)) {
347 		if (dst != dst2)
348 			return dst;
349 	} else {
350 		if (PTR_ERR(dst) == -EPERM)
351 			dst = NULL;
352 		else
353 			return dst;
354 	}
355 
356 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
357 	if (err)
358 		goto relookup_failed;
359 
360 	err = ip6_dst_lookup(sk, &dst2, &fl2);
361 	if (err)
362 		goto relookup_failed;
363 
364 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
365 	if (!IS_ERR(dst2)) {
366 		dst_release(dst);
367 		dst = dst2;
368 	} else {
369 		err = PTR_ERR(dst2);
370 		if (err == -EPERM) {
371 			dst_release(dst);
372 			return dst2;
373 		} else
374 			goto relookup_failed;
375 	}
376 
377 relookup_failed:
378 	if (dst)
379 		return dst;
380 	return ERR_PTR(err);
381 }
382 
383 /*
384  *	Send an ICMP message in response to a packet in error
385  */
386 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
387 {
388 	struct net *net = dev_net(skb->dev);
389 	struct inet6_dev *idev = NULL;
390 	struct ipv6hdr *hdr = ipv6_hdr(skb);
391 	struct sock *sk;
392 	struct ipv6_pinfo *np;
393 	const struct in6_addr *saddr = NULL;
394 	struct dst_entry *dst;
395 	struct icmp6hdr tmp_hdr;
396 	struct flowi6 fl6;
397 	struct icmpv6_msg msg;
398 	int iif = 0;
399 	int addr_type = 0;
400 	int len;
401 	int hlimit;
402 	int err = 0;
403 
404 	if ((u8 *)hdr < skb->head ||
405 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
406 		return;
407 
408 	/*
409 	 *	Make sure we respect the rules
410 	 *	i.e. RFC 1885 2.4(e)
411 	 *	Rule (e.1) is enforced by not using icmp6_send
412 	 *	in any code that processes icmp errors.
413 	 */
414 	addr_type = ipv6_addr_type(&hdr->daddr);
415 
416 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
417 		saddr = &hdr->daddr;
418 
419 	/*
420 	 *	Dest addr check
421 	 */
422 
423 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
424 		if (type != ICMPV6_PKT_TOOBIG &&
425 		    !(type == ICMPV6_PARAMPROB &&
426 		      code == ICMPV6_UNK_OPTION &&
427 		      (opt_unrec(skb, info))))
428 			return;
429 
430 		saddr = NULL;
431 	}
432 
433 	addr_type = ipv6_addr_type(&hdr->saddr);
434 
435 	/*
436 	 *	Source addr check
437 	 */
438 
439 	if (__ipv6_addr_needs_scope_id(addr_type))
440 		iif = skb->dev->ifindex;
441 
442 	/*
443 	 *	Must not send error if the source does not uniquely
444 	 *	identify a single node (RFC2463 Section 2.4).
445 	 *	We check unspecified / multicast addresses here,
446 	 *	and anycast addresses will be checked later.
447 	 */
448 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
449 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
450 		return;
451 	}
452 
453 	/*
454 	 *	Never answer to a ICMP packet.
455 	 */
456 	if (is_ineligible(skb)) {
457 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
458 		return;
459 	}
460 
461 	mip6_addr_swap(skb);
462 
463 	memset(&fl6, 0, sizeof(fl6));
464 	fl6.flowi6_proto = IPPROTO_ICMPV6;
465 	fl6.daddr = hdr->saddr;
466 	if (saddr)
467 		fl6.saddr = *saddr;
468 	fl6.flowi6_oif = iif;
469 	fl6.fl6_icmp_type = type;
470 	fl6.fl6_icmp_code = code;
471 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
472 
473 	sk = icmpv6_xmit_lock(net);
474 	if (sk == NULL)
475 		return;
476 	np = inet6_sk(sk);
477 
478 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
479 		goto out;
480 
481 	tmp_hdr.icmp6_type = type;
482 	tmp_hdr.icmp6_code = code;
483 	tmp_hdr.icmp6_cksum = 0;
484 	tmp_hdr.icmp6_pointer = htonl(info);
485 
486 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
487 		fl6.flowi6_oif = np->mcast_oif;
488 	else if (!fl6.flowi6_oif)
489 		fl6.flowi6_oif = np->ucast_oif;
490 
491 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
492 	if (IS_ERR(dst))
493 		goto out;
494 
495 	if (ipv6_addr_is_multicast(&fl6.daddr))
496 		hlimit = np->mcast_hops;
497 	else
498 		hlimit = np->hop_limit;
499 	if (hlimit < 0)
500 		hlimit = ip6_dst_hoplimit(dst);
501 
502 	msg.skb = skb;
503 	msg.offset = skb_network_offset(skb);
504 	msg.type = type;
505 
506 	len = skb->len - msg.offset;
507 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
508 	if (len < 0) {
509 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
510 		goto out_dst_release;
511 	}
512 
513 	rcu_read_lock();
514 	idev = __in6_dev_get(skb->dev);
515 
516 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
517 			      len + sizeof(struct icmp6hdr),
518 			      sizeof(struct icmp6hdr), hlimit,
519 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
520 			      MSG_DONTWAIT, np->dontfrag);
521 	if (err) {
522 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
523 		ip6_flush_pending_frames(sk);
524 	} else {
525 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
526 						 len + sizeof(struct icmp6hdr));
527 	}
528 	rcu_read_unlock();
529 out_dst_release:
530 	dst_release(dst);
531 out:
532 	icmpv6_xmit_unlock(sk);
533 }
534 
535 /* Slightly more convenient version of icmp6_send.
536  */
537 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
538 {
539 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
540 	kfree_skb(skb);
541 }
542 
543 static void icmpv6_echo_reply(struct sk_buff *skb)
544 {
545 	struct net *net = dev_net(skb->dev);
546 	struct sock *sk;
547 	struct inet6_dev *idev;
548 	struct ipv6_pinfo *np;
549 	const struct in6_addr *saddr = NULL;
550 	struct icmp6hdr *icmph = icmp6_hdr(skb);
551 	struct icmp6hdr tmp_hdr;
552 	struct flowi6 fl6;
553 	struct icmpv6_msg msg;
554 	struct dst_entry *dst;
555 	int err = 0;
556 	int hlimit;
557 	u8 tclass;
558 
559 	saddr = &ipv6_hdr(skb)->daddr;
560 
561 	if (!ipv6_unicast_destination(skb) &&
562 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
563 	      ipv6_anycast_destination(skb)))
564 		saddr = NULL;
565 
566 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
567 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
568 
569 	memset(&fl6, 0, sizeof(fl6));
570 	fl6.flowi6_proto = IPPROTO_ICMPV6;
571 	fl6.daddr = ipv6_hdr(skb)->saddr;
572 	if (saddr)
573 		fl6.saddr = *saddr;
574 	fl6.flowi6_oif = skb->dev->ifindex;
575 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
576 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
577 
578 	sk = icmpv6_xmit_lock(net);
579 	if (sk == NULL)
580 		return;
581 	np = inet6_sk(sk);
582 
583 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
584 		fl6.flowi6_oif = np->mcast_oif;
585 	else if (!fl6.flowi6_oif)
586 		fl6.flowi6_oif = np->ucast_oif;
587 
588 	err = ip6_dst_lookup(sk, &dst, &fl6);
589 	if (err)
590 		goto out;
591 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
592 	if (IS_ERR(dst))
593 		goto out;
594 
595 	if (ipv6_addr_is_multicast(&fl6.daddr))
596 		hlimit = np->mcast_hops;
597 	else
598 		hlimit = np->hop_limit;
599 	if (hlimit < 0)
600 		hlimit = ip6_dst_hoplimit(dst);
601 
602 	idev = __in6_dev_get(skb->dev);
603 
604 	msg.skb = skb;
605 	msg.offset = 0;
606 	msg.type = ICMPV6_ECHO_REPLY;
607 
608 	tclass = ipv6_get_dsfield(ipv6_hdr(skb));
609 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
610 				sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
611 				(struct rt6_info *)dst, MSG_DONTWAIT,
612 				np->dontfrag);
613 
614 	if (err) {
615 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
616 		ip6_flush_pending_frames(sk);
617 	} else {
618 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
619 						 skb->len + sizeof(struct icmp6hdr));
620 	}
621 	dst_release(dst);
622 out:
623 	icmpv6_xmit_unlock(sk);
624 }
625 
626 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
627 {
628 	const struct inet6_protocol *ipprot;
629 	int inner_offset;
630 	__be16 frag_off;
631 	u8 nexthdr;
632 
633 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
634 		return;
635 
636 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
637 	if (ipv6_ext_hdr(nexthdr)) {
638 		/* now skip over extension headers */
639 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
640 						&nexthdr, &frag_off);
641 		if (inner_offset<0)
642 			return;
643 	} else {
644 		inner_offset = sizeof(struct ipv6hdr);
645 	}
646 
647 	/* Checkin header including 8 bytes of inner protocol header. */
648 	if (!pskb_may_pull(skb, inner_offset+8))
649 		return;
650 
651 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
652 	   Without this we will not able f.e. to make source routed
653 	   pmtu discovery.
654 	   Corresponding argument (opt) to notifiers is already added.
655 	   --ANK (980726)
656 	 */
657 
658 	rcu_read_lock();
659 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
660 	if (ipprot && ipprot->err_handler)
661 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
662 	rcu_read_unlock();
663 
664 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
665 }
666 
667 /*
668  *	Handle icmp messages
669  */
670 
671 static int icmpv6_rcv(struct sk_buff *skb)
672 {
673 	struct net_device *dev = skb->dev;
674 	struct inet6_dev *idev = __in6_dev_get(dev);
675 	const struct in6_addr *saddr, *daddr;
676 	struct icmp6hdr *hdr;
677 	u8 type;
678 
679 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
680 		struct sec_path *sp = skb_sec_path(skb);
681 		int nh;
682 
683 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
684 				 XFRM_STATE_ICMP))
685 			goto drop_no_count;
686 
687 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
688 			goto drop_no_count;
689 
690 		nh = skb_network_offset(skb);
691 		skb_set_network_header(skb, sizeof(*hdr));
692 
693 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
694 			goto drop_no_count;
695 
696 		skb_set_network_header(skb, nh);
697 	}
698 
699 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
700 
701 	saddr = &ipv6_hdr(skb)->saddr;
702 	daddr = &ipv6_hdr(skb)->daddr;
703 
704 	/* Perform checksum. */
705 	switch (skb->ip_summed) {
706 	case CHECKSUM_COMPLETE:
707 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
708 				     skb->csum))
709 			break;
710 		/* fall through */
711 	case CHECKSUM_NONE:
712 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
713 					     IPPROTO_ICMPV6, 0));
714 		if (__skb_checksum_complete(skb)) {
715 			LIMIT_NETDEBUG(KERN_DEBUG
716 				       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
717 				       saddr, daddr);
718 			goto csum_error;
719 		}
720 	}
721 
722 	if (!pskb_pull(skb, sizeof(*hdr)))
723 		goto discard_it;
724 
725 	hdr = icmp6_hdr(skb);
726 
727 	type = hdr->icmp6_type;
728 
729 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
730 
731 	switch (type) {
732 	case ICMPV6_ECHO_REQUEST:
733 		icmpv6_echo_reply(skb);
734 		break;
735 
736 	case ICMPV6_ECHO_REPLY:
737 		ping_rcv(skb);
738 		break;
739 
740 	case ICMPV6_PKT_TOOBIG:
741 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
742 		   standard destination cache. Seems, only "advanced"
743 		   destination cache will allow to solve this problem
744 		   --ANK (980726)
745 		 */
746 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
747 			goto discard_it;
748 		hdr = icmp6_hdr(skb);
749 
750 		/*
751 		 *	Drop through to notify
752 		 */
753 
754 	case ICMPV6_DEST_UNREACH:
755 	case ICMPV6_TIME_EXCEED:
756 	case ICMPV6_PARAMPROB:
757 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
758 		break;
759 
760 	case NDISC_ROUTER_SOLICITATION:
761 	case NDISC_ROUTER_ADVERTISEMENT:
762 	case NDISC_NEIGHBOUR_SOLICITATION:
763 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
764 	case NDISC_REDIRECT:
765 		ndisc_rcv(skb);
766 		break;
767 
768 	case ICMPV6_MGM_QUERY:
769 		igmp6_event_query(skb);
770 		break;
771 
772 	case ICMPV6_MGM_REPORT:
773 		igmp6_event_report(skb);
774 		break;
775 
776 	case ICMPV6_MGM_REDUCTION:
777 	case ICMPV6_NI_QUERY:
778 	case ICMPV6_NI_REPLY:
779 	case ICMPV6_MLD2_REPORT:
780 	case ICMPV6_DHAAD_REQUEST:
781 	case ICMPV6_DHAAD_REPLY:
782 	case ICMPV6_MOBILE_PREFIX_SOL:
783 	case ICMPV6_MOBILE_PREFIX_ADV:
784 		break;
785 
786 	default:
787 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
788 
789 		/* informational */
790 		if (type & ICMPV6_INFOMSG_MASK)
791 			break;
792 
793 		/*
794 		 * error of unknown type.
795 		 * must pass to upper level
796 		 */
797 
798 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
799 	}
800 
801 	kfree_skb(skb);
802 	return 0;
803 
804 csum_error:
805 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
806 discard_it:
807 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
808 drop_no_count:
809 	kfree_skb(skb);
810 	return 0;
811 }
812 
813 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
814 		      u8 type,
815 		      const struct in6_addr *saddr,
816 		      const struct in6_addr *daddr,
817 		      int oif)
818 {
819 	memset(fl6, 0, sizeof(*fl6));
820 	fl6->saddr = *saddr;
821 	fl6->daddr = *daddr;
822 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
823 	fl6->fl6_icmp_type	= type;
824 	fl6->fl6_icmp_code	= 0;
825 	fl6->flowi6_oif		= oif;
826 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
827 }
828 
829 /*
830  * Special lock-class for __icmpv6_sk:
831  */
832 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
833 
834 static int __net_init icmpv6_sk_init(struct net *net)
835 {
836 	struct sock *sk;
837 	int err, i, j;
838 
839 	net->ipv6.icmp_sk =
840 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
841 	if (net->ipv6.icmp_sk == NULL)
842 		return -ENOMEM;
843 
844 	for_each_possible_cpu(i) {
845 		err = inet_ctl_sock_create(&sk, PF_INET6,
846 					   SOCK_RAW, IPPROTO_ICMPV6, net);
847 		if (err < 0) {
848 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
849 			       err);
850 			goto fail;
851 		}
852 
853 		net->ipv6.icmp_sk[i] = sk;
854 
855 		/*
856 		 * Split off their lock-class, because sk->sk_dst_lock
857 		 * gets used from softirqs, which is safe for
858 		 * __icmpv6_sk (because those never get directly used
859 		 * via userspace syscalls), but unsafe for normal sockets.
860 		 */
861 		lockdep_set_class(&sk->sk_dst_lock,
862 				  &icmpv6_socket_sk_dst_lock_key);
863 
864 		/* Enough space for 2 64K ICMP packets, including
865 		 * sk_buff struct overhead.
866 		 */
867 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
868 	}
869 	return 0;
870 
871  fail:
872 	for (j = 0; j < i; j++)
873 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
874 	kfree(net->ipv6.icmp_sk);
875 	return err;
876 }
877 
878 static void __net_exit icmpv6_sk_exit(struct net *net)
879 {
880 	int i;
881 
882 	for_each_possible_cpu(i) {
883 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
884 	}
885 	kfree(net->ipv6.icmp_sk);
886 }
887 
888 static struct pernet_operations icmpv6_sk_ops = {
889        .init = icmpv6_sk_init,
890        .exit = icmpv6_sk_exit,
891 };
892 
893 int __init icmpv6_init(void)
894 {
895 	int err;
896 
897 	err = register_pernet_subsys(&icmpv6_sk_ops);
898 	if (err < 0)
899 		return err;
900 
901 	err = -EAGAIN;
902 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
903 		goto fail;
904 
905 	err = inet6_register_icmp_sender(icmp6_send);
906 	if (err)
907 		goto sender_reg_err;
908 	return 0;
909 
910 sender_reg_err:
911 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
912 fail:
913 	pr_err("Failed to register ICMP6 protocol\n");
914 	unregister_pernet_subsys(&icmpv6_sk_ops);
915 	return err;
916 }
917 
918 void icmpv6_cleanup(void)
919 {
920 	inet6_unregister_icmp_sender(icmp6_send);
921 	unregister_pernet_subsys(&icmpv6_sk_ops);
922 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
923 }
924 
925 
926 static const struct icmp6_err {
927 	int err;
928 	int fatal;
929 } tab_unreach[] = {
930 	{	/* NOROUTE */
931 		.err	= ENETUNREACH,
932 		.fatal	= 0,
933 	},
934 	{	/* ADM_PROHIBITED */
935 		.err	= EACCES,
936 		.fatal	= 1,
937 	},
938 	{	/* Was NOT_NEIGHBOUR, now reserved */
939 		.err	= EHOSTUNREACH,
940 		.fatal	= 0,
941 	},
942 	{	/* ADDR_UNREACH	*/
943 		.err	= EHOSTUNREACH,
944 		.fatal	= 0,
945 	},
946 	{	/* PORT_UNREACH	*/
947 		.err	= ECONNREFUSED,
948 		.fatal	= 1,
949 	},
950 	{	/* POLICY_FAIL */
951 		.err	= EACCES,
952 		.fatal	= 1,
953 	},
954 	{	/* REJECT_ROUTE	*/
955 		.err	= EACCES,
956 		.fatal	= 1,
957 	},
958 };
959 
960 int icmpv6_err_convert(u8 type, u8 code, int *err)
961 {
962 	int fatal = 0;
963 
964 	*err = EPROTO;
965 
966 	switch (type) {
967 	case ICMPV6_DEST_UNREACH:
968 		fatal = 1;
969 		if (code < ARRAY_SIZE(tab_unreach)) {
970 			*err  = tab_unreach[code].err;
971 			fatal = tab_unreach[code].fatal;
972 		}
973 		break;
974 
975 	case ICMPV6_PKT_TOOBIG:
976 		*err = EMSGSIZE;
977 		break;
978 
979 	case ICMPV6_PARAMPROB:
980 		*err = EPROTO;
981 		fatal = 1;
982 		break;
983 
984 	case ICMPV6_TIME_EXCEED:
985 		*err = EHOSTUNREACH;
986 		break;
987 	}
988 
989 	return fatal;
990 }
991 EXPORT_SYMBOL(icmpv6_err_convert);
992 
993 #ifdef CONFIG_SYSCTL
994 static struct ctl_table ipv6_icmp_table_template[] = {
995 	{
996 		.procname	= "ratelimit",
997 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
998 		.maxlen		= sizeof(int),
999 		.mode		= 0644,
1000 		.proc_handler	= proc_dointvec_ms_jiffies,
1001 	},
1002 	{ },
1003 };
1004 
1005 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1006 {
1007 	struct ctl_table *table;
1008 
1009 	table = kmemdup(ipv6_icmp_table_template,
1010 			sizeof(ipv6_icmp_table_template),
1011 			GFP_KERNEL);
1012 
1013 	if (table)
1014 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1015 
1016 	return table;
1017 }
1018 #endif
1019 
1020