xref: /openbmc/linux/net/ipv6/icmp.c (revision 867a0e05)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 
72 #include <asm/uaccess.h>
73 
74 /*
75  *	The ICMP socket(s). This is the most convenient way to flow control
76  *	our ICMP output as well as maintain a clean interface throughout
77  *	all layers. All Socketless IP sends will soon be gone.
78  *
79  *	On SMP we have one ICMP socket per-cpu.
80  */
81 static inline struct sock *icmpv6_sk(struct net *net)
82 {
83 	return net->ipv6.icmp_sk[smp_processor_id()];
84 }
85 
86 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
87 		       u8 type, u8 code, int offset, __be32 info)
88 {
89 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
90 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
91 	struct net *net = dev_net(skb->dev);
92 
93 	if (type == ICMPV6_PKT_TOOBIG)
94 		ip6_update_pmtu(skb, net, info, 0, 0);
95 	else if (type == NDISC_REDIRECT)
96 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
97 
98 	if (!(type & ICMPV6_INFOMSG_MASK))
99 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
100 			ping_err(skb, offset, info);
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
112 {
113 	struct sock *sk;
114 
115 	local_bh_disable();
116 
117 	sk = icmpv6_sk(net);
118 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
119 		/* This can happen if the output path (f.e. SIT or
120 		 * ip6ip6 tunnel) signals dst_link_failure() for an
121 		 * outgoing ICMP6 packet.
122 		 */
123 		local_bh_enable();
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock_bh(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (tp == NULL ||
164 		    !(*tp & ICMPV6_INFOMSG_MASK))
165 			return true;
166 	}
167 	return false;
168 }
169 
170 /*
171  * Check the ICMP output rate limit
172  */
173 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
174 			       struct flowi6 *fl6)
175 {
176 	struct net *net = sock_net(sk);
177 	struct dst_entry *dst;
178 	bool res = false;
179 
180 	/* Informational messages are not limited. */
181 	if (type & ICMPV6_INFOMSG_MASK)
182 		return true;
183 
184 	/* Do not limit pmtu discovery, it would break it. */
185 	if (type == ICMPV6_PKT_TOOBIG)
186 		return true;
187 
188 	/*
189 	 * Look up the output route.
190 	 * XXX: perhaps the expire for routing entries cloned by
191 	 * this lookup should be more aggressive (not longer than timeout).
192 	 */
193 	dst = ip6_route_output(net, sk, fl6);
194 	if (dst->error) {
195 		IP6_INC_STATS(net, ip6_dst_idev(dst),
196 			      IPSTATS_MIB_OUTNOROUTES);
197 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
198 		res = true;
199 	} else {
200 		struct rt6_info *rt = (struct rt6_info *)dst;
201 		int tmo = net->ipv6.sysctl.icmpv6_time;
202 
203 		/* Give more bandwidth to wider prefixes. */
204 		if (rt->rt6i_dst.plen < 128)
205 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
206 
207 		if (icmp_global_allow()) {
208 			struct inet_peer *peer;
209 
210 			peer = inet_getpeer_v6(net->ipv6.peers,
211 					       &rt->rt6i_dst.addr, 1);
212 			res = inet_peer_xrlim_allow(peer, tmo);
213 			if (peer)
214 				inet_putpeer(peer);
215 		}
216 	}
217 	dst_release(dst);
218 	return res;
219 }
220 
221 /*
222  *	an inline helper for the "simple" if statement below
223  *	checks if parameter problem report is caused by an
224  *	unrecognized IPv6 option that has the Option Type
225  *	highest-order two bits set to 10
226  */
227 
228 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
229 {
230 	u8 _optval, *op;
231 
232 	offset += skb_network_offset(skb);
233 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
234 	if (op == NULL)
235 		return true;
236 	return (*op & 0xC0) == 0x80;
237 }
238 
239 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
240 			       struct icmp6hdr *thdr, int len)
241 {
242 	struct sk_buff *skb;
243 	struct icmp6hdr *icmp6h;
244 	int err = 0;
245 
246 	skb = skb_peek(&sk->sk_write_queue);
247 	if (skb == NULL)
248 		goto out;
249 
250 	icmp6h = icmp6_hdr(skb);
251 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
252 	icmp6h->icmp6_cksum = 0;
253 
254 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
255 		skb->csum = csum_partial(icmp6h,
256 					sizeof(struct icmp6hdr), skb->csum);
257 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
258 						      &fl6->daddr,
259 						      len, fl6->flowi6_proto,
260 						      skb->csum);
261 	} else {
262 		__wsum tmp_csum = 0;
263 
264 		skb_queue_walk(&sk->sk_write_queue, skb) {
265 			tmp_csum = csum_add(tmp_csum, skb->csum);
266 		}
267 
268 		tmp_csum = csum_partial(icmp6h,
269 					sizeof(struct icmp6hdr), tmp_csum);
270 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
271 						      &fl6->daddr,
272 						      len, fl6->flowi6_proto,
273 						      tmp_csum);
274 	}
275 	ip6_push_pending_frames(sk);
276 out:
277 	return err;
278 }
279 
280 struct icmpv6_msg {
281 	struct sk_buff	*skb;
282 	int		offset;
283 	uint8_t		type;
284 };
285 
286 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
287 {
288 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
289 	struct sk_buff *org_skb = msg->skb;
290 	__wsum csum = 0;
291 
292 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
293 				      to, len, csum);
294 	skb->csum = csum_block_add(skb->csum, csum, odd);
295 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
296 		nf_ct_attach(skb, org_skb);
297 	return 0;
298 }
299 
300 #if IS_ENABLED(CONFIG_IPV6_MIP6)
301 static void mip6_addr_swap(struct sk_buff *skb)
302 {
303 	struct ipv6hdr *iph = ipv6_hdr(skb);
304 	struct inet6_skb_parm *opt = IP6CB(skb);
305 	struct ipv6_destopt_hao *hao;
306 	struct in6_addr tmp;
307 	int off;
308 
309 	if (opt->dsthao) {
310 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
311 		if (likely(off >= 0)) {
312 			hao = (struct ipv6_destopt_hao *)
313 					(skb_network_header(skb) + off);
314 			tmp = iph->saddr;
315 			iph->saddr = hao->addr;
316 			hao->addr = tmp;
317 		}
318 	}
319 }
320 #else
321 static inline void mip6_addr_swap(struct sk_buff *skb) {}
322 #endif
323 
324 static struct dst_entry *icmpv6_route_lookup(struct net *net,
325 					     struct sk_buff *skb,
326 					     struct sock *sk,
327 					     struct flowi6 *fl6)
328 {
329 	struct dst_entry *dst, *dst2;
330 	struct flowi6 fl2;
331 	int err;
332 
333 	err = ip6_dst_lookup(sk, &dst, fl6);
334 	if (err)
335 		return ERR_PTR(err);
336 
337 	/*
338 	 * We won't send icmp if the destination is known
339 	 * anycast.
340 	 */
341 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
342 		net_dbg_ratelimited("icmp6_send: acast source\n");
343 		dst_release(dst);
344 		return ERR_PTR(-EINVAL);
345 	}
346 
347 	/* No need to clone since we're just using its address. */
348 	dst2 = dst;
349 
350 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
351 	if (!IS_ERR(dst)) {
352 		if (dst != dst2)
353 			return dst;
354 	} else {
355 		if (PTR_ERR(dst) == -EPERM)
356 			dst = NULL;
357 		else
358 			return dst;
359 	}
360 
361 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
362 	if (err)
363 		goto relookup_failed;
364 
365 	err = ip6_dst_lookup(sk, &dst2, &fl2);
366 	if (err)
367 		goto relookup_failed;
368 
369 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
370 	if (!IS_ERR(dst2)) {
371 		dst_release(dst);
372 		dst = dst2;
373 	} else {
374 		err = PTR_ERR(dst2);
375 		if (err == -EPERM) {
376 			dst_release(dst);
377 			return dst2;
378 		} else
379 			goto relookup_failed;
380 	}
381 
382 relookup_failed:
383 	if (dst)
384 		return dst;
385 	return ERR_PTR(err);
386 }
387 
388 /*
389  *	Send an ICMP message in response to a packet in error
390  */
391 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
392 {
393 	struct net *net = dev_net(skb->dev);
394 	struct inet6_dev *idev = NULL;
395 	struct ipv6hdr *hdr = ipv6_hdr(skb);
396 	struct sock *sk;
397 	struct ipv6_pinfo *np;
398 	const struct in6_addr *saddr = NULL;
399 	struct dst_entry *dst;
400 	struct icmp6hdr tmp_hdr;
401 	struct flowi6 fl6;
402 	struct icmpv6_msg msg;
403 	int iif = 0;
404 	int addr_type = 0;
405 	int len;
406 	int hlimit;
407 	int err = 0;
408 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
409 
410 	if ((u8 *)hdr < skb->head ||
411 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
412 		return;
413 
414 	/*
415 	 *	Make sure we respect the rules
416 	 *	i.e. RFC 1885 2.4(e)
417 	 *	Rule (e.1) is enforced by not using icmp6_send
418 	 *	in any code that processes icmp errors.
419 	 */
420 	addr_type = ipv6_addr_type(&hdr->daddr);
421 
422 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
423 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
424 		saddr = &hdr->daddr;
425 
426 	/*
427 	 *	Dest addr check
428 	 */
429 
430 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
431 		if (type != ICMPV6_PKT_TOOBIG &&
432 		    !(type == ICMPV6_PARAMPROB &&
433 		      code == ICMPV6_UNK_OPTION &&
434 		      (opt_unrec(skb, info))))
435 			return;
436 
437 		saddr = NULL;
438 	}
439 
440 	addr_type = ipv6_addr_type(&hdr->saddr);
441 
442 	/*
443 	 *	Source addr check
444 	 */
445 
446 	if (__ipv6_addr_needs_scope_id(addr_type))
447 		iif = skb->dev->ifindex;
448 
449 	/*
450 	 *	Must not send error if the source does not uniquely
451 	 *	identify a single node (RFC2463 Section 2.4).
452 	 *	We check unspecified / multicast addresses here,
453 	 *	and anycast addresses will be checked later.
454 	 */
455 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
456 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n");
457 		return;
458 	}
459 
460 	/*
461 	 *	Never answer to a ICMP packet.
462 	 */
463 	if (is_ineligible(skb)) {
464 		net_dbg_ratelimited("icmp6_send: no reply to icmp error\n");
465 		return;
466 	}
467 
468 	mip6_addr_swap(skb);
469 
470 	memset(&fl6, 0, sizeof(fl6));
471 	fl6.flowi6_proto = IPPROTO_ICMPV6;
472 	fl6.daddr = hdr->saddr;
473 	if (saddr)
474 		fl6.saddr = *saddr;
475 	fl6.flowi6_mark = mark;
476 	fl6.flowi6_oif = iif;
477 	fl6.fl6_icmp_type = type;
478 	fl6.fl6_icmp_code = code;
479 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
480 
481 	sk = icmpv6_xmit_lock(net);
482 	if (sk == NULL)
483 		return;
484 	sk->sk_mark = mark;
485 	np = inet6_sk(sk);
486 
487 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
488 		goto out;
489 
490 	tmp_hdr.icmp6_type = type;
491 	tmp_hdr.icmp6_code = code;
492 	tmp_hdr.icmp6_cksum = 0;
493 	tmp_hdr.icmp6_pointer = htonl(info);
494 
495 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
496 		fl6.flowi6_oif = np->mcast_oif;
497 	else if (!fl6.flowi6_oif)
498 		fl6.flowi6_oif = np->ucast_oif;
499 
500 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
501 	if (IS_ERR(dst))
502 		goto out;
503 
504 	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
505 
506 	msg.skb = skb;
507 	msg.offset = skb_network_offset(skb);
508 	msg.type = type;
509 
510 	len = skb->len - msg.offset;
511 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
512 	if (len < 0) {
513 		net_dbg_ratelimited("icmp: len problem\n");
514 		goto out_dst_release;
515 	}
516 
517 	rcu_read_lock();
518 	idev = __in6_dev_get(skb->dev);
519 
520 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
521 			      len + sizeof(struct icmp6hdr),
522 			      sizeof(struct icmp6hdr), hlimit,
523 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
524 			      MSG_DONTWAIT, np->dontfrag);
525 	if (err) {
526 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
527 		ip6_flush_pending_frames(sk);
528 	} else {
529 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
530 						 len + sizeof(struct icmp6hdr));
531 	}
532 	rcu_read_unlock();
533 out_dst_release:
534 	dst_release(dst);
535 out:
536 	icmpv6_xmit_unlock(sk);
537 }
538 
539 /* Slightly more convenient version of icmp6_send.
540  */
541 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
542 {
543 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
544 	kfree_skb(skb);
545 }
546 
547 static void icmpv6_echo_reply(struct sk_buff *skb)
548 {
549 	struct net *net = dev_net(skb->dev);
550 	struct sock *sk;
551 	struct inet6_dev *idev;
552 	struct ipv6_pinfo *np;
553 	const struct in6_addr *saddr = NULL;
554 	struct icmp6hdr *icmph = icmp6_hdr(skb);
555 	struct icmp6hdr tmp_hdr;
556 	struct flowi6 fl6;
557 	struct icmpv6_msg msg;
558 	struct dst_entry *dst;
559 	int err = 0;
560 	int hlimit;
561 	u8 tclass;
562 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
563 
564 	saddr = &ipv6_hdr(skb)->daddr;
565 
566 	if (!ipv6_unicast_destination(skb) &&
567 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
568 	      ipv6_anycast_destination(skb)))
569 		saddr = NULL;
570 
571 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
572 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
573 
574 	memset(&fl6, 0, sizeof(fl6));
575 	fl6.flowi6_proto = IPPROTO_ICMPV6;
576 	fl6.daddr = ipv6_hdr(skb)->saddr;
577 	if (saddr)
578 		fl6.saddr = *saddr;
579 	fl6.flowi6_oif = skb->dev->ifindex;
580 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
581 	fl6.flowi6_mark = mark;
582 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
583 
584 	sk = icmpv6_xmit_lock(net);
585 	if (sk == NULL)
586 		return;
587 	sk->sk_mark = mark;
588 	np = inet6_sk(sk);
589 
590 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
591 		fl6.flowi6_oif = np->mcast_oif;
592 	else if (!fl6.flowi6_oif)
593 		fl6.flowi6_oif = np->ucast_oif;
594 
595 	err = ip6_dst_lookup(sk, &dst, &fl6);
596 	if (err)
597 		goto out;
598 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
599 	if (IS_ERR(dst))
600 		goto out;
601 
602 	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
603 
604 	idev = __in6_dev_get(skb->dev);
605 
606 	msg.skb = skb;
607 	msg.offset = 0;
608 	msg.type = ICMPV6_ECHO_REPLY;
609 
610 	tclass = ipv6_get_dsfield(ipv6_hdr(skb));
611 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
612 				sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
613 				(struct rt6_info *)dst, MSG_DONTWAIT,
614 				np->dontfrag);
615 
616 	if (err) {
617 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
618 		ip6_flush_pending_frames(sk);
619 	} else {
620 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
621 						 skb->len + sizeof(struct icmp6hdr));
622 	}
623 	dst_release(dst);
624 out:
625 	icmpv6_xmit_unlock(sk);
626 }
627 
628 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
629 {
630 	const struct inet6_protocol *ipprot;
631 	int inner_offset;
632 	__be16 frag_off;
633 	u8 nexthdr;
634 	struct net *net = dev_net(skb->dev);
635 
636 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
637 		goto out;
638 
639 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
640 	if (ipv6_ext_hdr(nexthdr)) {
641 		/* now skip over extension headers */
642 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
643 						&nexthdr, &frag_off);
644 		if (inner_offset < 0)
645 			goto out;
646 	} else {
647 		inner_offset = sizeof(struct ipv6hdr);
648 	}
649 
650 	/* Checkin header including 8 bytes of inner protocol header. */
651 	if (!pskb_may_pull(skb, inner_offset+8))
652 		goto out;
653 
654 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
655 	   Without this we will not able f.e. to make source routed
656 	   pmtu discovery.
657 	   Corresponding argument (opt) to notifiers is already added.
658 	   --ANK (980726)
659 	 */
660 
661 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
662 	if (ipprot && ipprot->err_handler)
663 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
664 
665 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
666 	return;
667 
668 out:
669 	ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
670 }
671 
672 /*
673  *	Handle icmp messages
674  */
675 
676 static int icmpv6_rcv(struct sk_buff *skb)
677 {
678 	struct net_device *dev = skb->dev;
679 	struct inet6_dev *idev = __in6_dev_get(dev);
680 	const struct in6_addr *saddr, *daddr;
681 	struct icmp6hdr *hdr;
682 	u8 type;
683 	bool success = false;
684 
685 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
686 		struct sec_path *sp = skb_sec_path(skb);
687 		int nh;
688 
689 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
690 				 XFRM_STATE_ICMP))
691 			goto drop_no_count;
692 
693 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
694 			goto drop_no_count;
695 
696 		nh = skb_network_offset(skb);
697 		skb_set_network_header(skb, sizeof(*hdr));
698 
699 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
700 			goto drop_no_count;
701 
702 		skb_set_network_header(skb, nh);
703 	}
704 
705 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
706 
707 	saddr = &ipv6_hdr(skb)->saddr;
708 	daddr = &ipv6_hdr(skb)->daddr;
709 
710 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
711 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
712 				    saddr, daddr);
713 		goto csum_error;
714 	}
715 
716 	if (!pskb_pull(skb, sizeof(*hdr)))
717 		goto discard_it;
718 
719 	hdr = icmp6_hdr(skb);
720 
721 	type = hdr->icmp6_type;
722 
723 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
724 
725 	switch (type) {
726 	case ICMPV6_ECHO_REQUEST:
727 		icmpv6_echo_reply(skb);
728 		break;
729 
730 	case ICMPV6_ECHO_REPLY:
731 		success = ping_rcv(skb);
732 		break;
733 
734 	case ICMPV6_PKT_TOOBIG:
735 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
736 		   standard destination cache. Seems, only "advanced"
737 		   destination cache will allow to solve this problem
738 		   --ANK (980726)
739 		 */
740 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
741 			goto discard_it;
742 		hdr = icmp6_hdr(skb);
743 
744 		/*
745 		 *	Drop through to notify
746 		 */
747 
748 	case ICMPV6_DEST_UNREACH:
749 	case ICMPV6_TIME_EXCEED:
750 	case ICMPV6_PARAMPROB:
751 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
752 		break;
753 
754 	case NDISC_ROUTER_SOLICITATION:
755 	case NDISC_ROUTER_ADVERTISEMENT:
756 	case NDISC_NEIGHBOUR_SOLICITATION:
757 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
758 	case NDISC_REDIRECT:
759 		ndisc_rcv(skb);
760 		break;
761 
762 	case ICMPV6_MGM_QUERY:
763 		igmp6_event_query(skb);
764 		break;
765 
766 	case ICMPV6_MGM_REPORT:
767 		igmp6_event_report(skb);
768 		break;
769 
770 	case ICMPV6_MGM_REDUCTION:
771 	case ICMPV6_NI_QUERY:
772 	case ICMPV6_NI_REPLY:
773 	case ICMPV6_MLD2_REPORT:
774 	case ICMPV6_DHAAD_REQUEST:
775 	case ICMPV6_DHAAD_REPLY:
776 	case ICMPV6_MOBILE_PREFIX_SOL:
777 	case ICMPV6_MOBILE_PREFIX_ADV:
778 		break;
779 
780 	default:
781 		/* informational */
782 		if (type & ICMPV6_INFOMSG_MASK)
783 			break;
784 
785 		net_dbg_ratelimited("icmpv6: msg of unknown type\n");
786 
787 		/*
788 		 * error of unknown type.
789 		 * must pass to upper level
790 		 */
791 
792 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
793 	}
794 
795 	/* until the v6 path can be better sorted assume failure and
796 	 * preserve the status quo behaviour for the rest of the paths to here
797 	 */
798 	if (success)
799 		consume_skb(skb);
800 	else
801 		kfree_skb(skb);
802 
803 	return 0;
804 
805 csum_error:
806 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
807 discard_it:
808 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
809 drop_no_count:
810 	kfree_skb(skb);
811 	return 0;
812 }
813 
814 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
815 		      u8 type,
816 		      const struct in6_addr *saddr,
817 		      const struct in6_addr *daddr,
818 		      int oif)
819 {
820 	memset(fl6, 0, sizeof(*fl6));
821 	fl6->saddr = *saddr;
822 	fl6->daddr = *daddr;
823 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
824 	fl6->fl6_icmp_type	= type;
825 	fl6->fl6_icmp_code	= 0;
826 	fl6->flowi6_oif		= oif;
827 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
828 }
829 
830 /*
831  * Special lock-class for __icmpv6_sk:
832  */
833 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
834 
835 static int __net_init icmpv6_sk_init(struct net *net)
836 {
837 	struct sock *sk;
838 	int err, i, j;
839 
840 	net->ipv6.icmp_sk =
841 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
842 	if (net->ipv6.icmp_sk == NULL)
843 		return -ENOMEM;
844 
845 	for_each_possible_cpu(i) {
846 		err = inet_ctl_sock_create(&sk, PF_INET6,
847 					   SOCK_RAW, IPPROTO_ICMPV6, net);
848 		if (err < 0) {
849 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
850 			       err);
851 			goto fail;
852 		}
853 
854 		net->ipv6.icmp_sk[i] = sk;
855 
856 		/*
857 		 * Split off their lock-class, because sk->sk_dst_lock
858 		 * gets used from softirqs, which is safe for
859 		 * __icmpv6_sk (because those never get directly used
860 		 * via userspace syscalls), but unsafe for normal sockets.
861 		 */
862 		lockdep_set_class(&sk->sk_dst_lock,
863 				  &icmpv6_socket_sk_dst_lock_key);
864 
865 		/* Enough space for 2 64K ICMP packets, including
866 		 * sk_buff struct overhead.
867 		 */
868 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
869 	}
870 	return 0;
871 
872  fail:
873 	for (j = 0; j < i; j++)
874 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
875 	kfree(net->ipv6.icmp_sk);
876 	return err;
877 }
878 
879 static void __net_exit icmpv6_sk_exit(struct net *net)
880 {
881 	int i;
882 
883 	for_each_possible_cpu(i) {
884 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
885 	}
886 	kfree(net->ipv6.icmp_sk);
887 }
888 
889 static struct pernet_operations icmpv6_sk_ops = {
890 	.init = icmpv6_sk_init,
891 	.exit = icmpv6_sk_exit,
892 };
893 
894 int __init icmpv6_init(void)
895 {
896 	int err;
897 
898 	err = register_pernet_subsys(&icmpv6_sk_ops);
899 	if (err < 0)
900 		return err;
901 
902 	err = -EAGAIN;
903 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
904 		goto fail;
905 
906 	err = inet6_register_icmp_sender(icmp6_send);
907 	if (err)
908 		goto sender_reg_err;
909 	return 0;
910 
911 sender_reg_err:
912 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
913 fail:
914 	pr_err("Failed to register ICMP6 protocol\n");
915 	unregister_pernet_subsys(&icmpv6_sk_ops);
916 	return err;
917 }
918 
919 void icmpv6_cleanup(void)
920 {
921 	inet6_unregister_icmp_sender(icmp6_send);
922 	unregister_pernet_subsys(&icmpv6_sk_ops);
923 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
924 }
925 
926 
927 static const struct icmp6_err {
928 	int err;
929 	int fatal;
930 } tab_unreach[] = {
931 	{	/* NOROUTE */
932 		.err	= ENETUNREACH,
933 		.fatal	= 0,
934 	},
935 	{	/* ADM_PROHIBITED */
936 		.err	= EACCES,
937 		.fatal	= 1,
938 	},
939 	{	/* Was NOT_NEIGHBOUR, now reserved */
940 		.err	= EHOSTUNREACH,
941 		.fatal	= 0,
942 	},
943 	{	/* ADDR_UNREACH	*/
944 		.err	= EHOSTUNREACH,
945 		.fatal	= 0,
946 	},
947 	{	/* PORT_UNREACH	*/
948 		.err	= ECONNREFUSED,
949 		.fatal	= 1,
950 	},
951 	{	/* POLICY_FAIL */
952 		.err	= EACCES,
953 		.fatal	= 1,
954 	},
955 	{	/* REJECT_ROUTE	*/
956 		.err	= EACCES,
957 		.fatal	= 1,
958 	},
959 };
960 
961 int icmpv6_err_convert(u8 type, u8 code, int *err)
962 {
963 	int fatal = 0;
964 
965 	*err = EPROTO;
966 
967 	switch (type) {
968 	case ICMPV6_DEST_UNREACH:
969 		fatal = 1;
970 		if (code < ARRAY_SIZE(tab_unreach)) {
971 			*err  = tab_unreach[code].err;
972 			fatal = tab_unreach[code].fatal;
973 		}
974 		break;
975 
976 	case ICMPV6_PKT_TOOBIG:
977 		*err = EMSGSIZE;
978 		break;
979 
980 	case ICMPV6_PARAMPROB:
981 		*err = EPROTO;
982 		fatal = 1;
983 		break;
984 
985 	case ICMPV6_TIME_EXCEED:
986 		*err = EHOSTUNREACH;
987 		break;
988 	}
989 
990 	return fatal;
991 }
992 EXPORT_SYMBOL(icmpv6_err_convert);
993 
994 #ifdef CONFIG_SYSCTL
995 static struct ctl_table ipv6_icmp_table_template[] = {
996 	{
997 		.procname	= "ratelimit",
998 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
999 		.maxlen		= sizeof(int),
1000 		.mode		= 0644,
1001 		.proc_handler	= proc_dointvec_ms_jiffies,
1002 	},
1003 	{ },
1004 };
1005 
1006 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1007 {
1008 	struct ctl_table *table;
1009 
1010 	table = kmemdup(ipv6_icmp_table_template,
1011 			sizeof(ipv6_icmp_table_template),
1012 			GFP_KERNEL);
1013 
1014 	if (table)
1015 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1016 
1017 	return table;
1018 }
1019 #endif
1020