xref: /openbmc/linux/net/ipv6/icmp.c (revision cc71b7b0)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 /* Called with BH disabled */
114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
115 {
116 	struct sock *sk;
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 static bool icmpv6_mask_allow(int type)
170 {
171 	/* Informational messages are not limited. */
172 	if (type & ICMPV6_INFOMSG_MASK)
173 		return true;
174 
175 	/* Do not limit pmtu discovery, it would break it. */
176 	if (type == ICMPV6_PKT_TOOBIG)
177 		return true;
178 
179 	return false;
180 }
181 
182 static bool icmpv6_global_allow(int type)
183 {
184 	if (icmpv6_mask_allow(type))
185 		return true;
186 
187 	if (icmp_global_allow())
188 		return true;
189 
190 	return false;
191 }
192 
193 /*
194  * Check the ICMP output rate limit
195  */
196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
197 			       struct flowi6 *fl6)
198 {
199 	struct net *net = sock_net(sk);
200 	struct dst_entry *dst;
201 	bool res = false;
202 
203 	if (icmpv6_mask_allow(type))
204 		return true;
205 
206 	/*
207 	 * Look up the output route.
208 	 * XXX: perhaps the expire for routing entries cloned by
209 	 * this lookup should be more aggressive (not longer than timeout).
210 	 */
211 	dst = ip6_route_output(net, sk, fl6);
212 	if (dst->error) {
213 		IP6_INC_STATS(net, ip6_dst_idev(dst),
214 			      IPSTATS_MIB_OUTNOROUTES);
215 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216 		res = true;
217 	} else {
218 		struct rt6_info *rt = (struct rt6_info *)dst;
219 		int tmo = net->ipv6.sysctl.icmpv6_time;
220 		struct inet_peer *peer;
221 
222 		/* Give more bandwidth to wider prefixes. */
223 		if (rt->rt6i_dst.plen < 128)
224 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225 
226 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
227 		res = inet_peer_xrlim_allow(peer, tmo);
228 		if (peer)
229 			inet_putpeer(peer);
230 	}
231 	dst_release(dst);
232 	return res;
233 }
234 
235 /*
236  *	an inline helper for the "simple" if statement below
237  *	checks if parameter problem report is caused by an
238  *	unrecognized IPv6 option that has the Option Type
239  *	highest-order two bits set to 10
240  */
241 
242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
243 {
244 	u8 _optval, *op;
245 
246 	offset += skb_network_offset(skb);
247 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248 	if (!op)
249 		return true;
250 	return (*op & 0xC0) == 0x80;
251 }
252 
253 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 			       struct icmp6hdr *thdr, int len)
255 {
256 	struct sk_buff *skb;
257 	struct icmp6hdr *icmp6h;
258 
259 	skb = skb_peek(&sk->sk_write_queue);
260 	if (!skb)
261 		goto out;
262 
263 	icmp6h = icmp6_hdr(skb);
264 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
265 	icmp6h->icmp6_cksum = 0;
266 
267 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
268 		skb->csum = csum_partial(icmp6h,
269 					sizeof(struct icmp6hdr), skb->csum);
270 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
271 						      &fl6->daddr,
272 						      len, fl6->flowi6_proto,
273 						      skb->csum);
274 	} else {
275 		__wsum tmp_csum = 0;
276 
277 		skb_queue_walk(&sk->sk_write_queue, skb) {
278 			tmp_csum = csum_add(tmp_csum, skb->csum);
279 		}
280 
281 		tmp_csum = csum_partial(icmp6h,
282 					sizeof(struct icmp6hdr), tmp_csum);
283 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
284 						      &fl6->daddr,
285 						      len, fl6->flowi6_proto,
286 						      tmp_csum);
287 	}
288 	ip6_push_pending_frames(sk);
289 out:
290 	return 0;
291 }
292 
293 struct icmpv6_msg {
294 	struct sk_buff	*skb;
295 	int		offset;
296 	uint8_t		type;
297 };
298 
299 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
300 {
301 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
302 	struct sk_buff *org_skb = msg->skb;
303 	__wsum csum = 0;
304 
305 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
306 				      to, len, csum);
307 	skb->csum = csum_block_add(skb->csum, csum, odd);
308 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
309 		nf_ct_attach(skb, org_skb);
310 	return 0;
311 }
312 
313 #if IS_ENABLED(CONFIG_IPV6_MIP6)
314 static void mip6_addr_swap(struct sk_buff *skb)
315 {
316 	struct ipv6hdr *iph = ipv6_hdr(skb);
317 	struct inet6_skb_parm *opt = IP6CB(skb);
318 	struct ipv6_destopt_hao *hao;
319 	struct in6_addr tmp;
320 	int off;
321 
322 	if (opt->dsthao) {
323 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
324 		if (likely(off >= 0)) {
325 			hao = (struct ipv6_destopt_hao *)
326 					(skb_network_header(skb) + off);
327 			tmp = iph->saddr;
328 			iph->saddr = hao->addr;
329 			hao->addr = tmp;
330 		}
331 	}
332 }
333 #else
334 static inline void mip6_addr_swap(struct sk_buff *skb) {}
335 #endif
336 
337 static struct dst_entry *icmpv6_route_lookup(struct net *net,
338 					     struct sk_buff *skb,
339 					     struct sock *sk,
340 					     struct flowi6 *fl6)
341 {
342 	struct dst_entry *dst, *dst2;
343 	struct flowi6 fl2;
344 	int err;
345 
346 	err = ip6_dst_lookup(net, sk, &dst, fl6);
347 	if (err)
348 		return ERR_PTR(err);
349 
350 	/*
351 	 * We won't send icmp if the destination is known
352 	 * anycast.
353 	 */
354 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
355 		net_dbg_ratelimited("icmp6_send: acast source\n");
356 		dst_release(dst);
357 		return ERR_PTR(-EINVAL);
358 	}
359 
360 	/* No need to clone since we're just using its address. */
361 	dst2 = dst;
362 
363 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
364 	if (!IS_ERR(dst)) {
365 		if (dst != dst2)
366 			return dst;
367 	} else {
368 		if (PTR_ERR(dst) == -EPERM)
369 			dst = NULL;
370 		else
371 			return dst;
372 	}
373 
374 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
375 	if (err)
376 		goto relookup_failed;
377 
378 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
379 	if (err)
380 		goto relookup_failed;
381 
382 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
383 	if (!IS_ERR(dst2)) {
384 		dst_release(dst);
385 		dst = dst2;
386 	} else {
387 		err = PTR_ERR(dst2);
388 		if (err == -EPERM) {
389 			dst_release(dst);
390 			return dst2;
391 		} else
392 			goto relookup_failed;
393 	}
394 
395 relookup_failed:
396 	if (dst)
397 		return dst;
398 	return ERR_PTR(err);
399 }
400 
401 static int icmp6_iif(const struct sk_buff *skb)
402 {
403 	int iif = skb->dev->ifindex;
404 
405 	/* for local traffic to local address, skb dev is the loopback
406 	 * device. Check if there is a dst attached to the skb and if so
407 	 * get the real device index.
408 	 */
409 	if (unlikely(iif == LOOPBACK_IFINDEX)) {
410 		const struct rt6_info *rt6 = skb_rt6_info(skb);
411 
412 		if (rt6)
413 			iif = rt6->rt6i_idev->dev->ifindex;
414 	}
415 
416 	return iif;
417 }
418 
419 /*
420  *	Send an ICMP message in response to a packet in error
421  */
422 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
423 		       const struct in6_addr *force_saddr)
424 {
425 	struct net *net = dev_net(skb->dev);
426 	struct inet6_dev *idev = NULL;
427 	struct ipv6hdr *hdr = ipv6_hdr(skb);
428 	struct sock *sk;
429 	struct ipv6_pinfo *np;
430 	const struct in6_addr *saddr = NULL;
431 	struct dst_entry *dst;
432 	struct icmp6hdr tmp_hdr;
433 	struct flowi6 fl6;
434 	struct icmpv6_msg msg;
435 	struct sockcm_cookie sockc_unused = {0};
436 	struct ipcm6_cookie ipc6;
437 	int iif = 0;
438 	int addr_type = 0;
439 	int len;
440 	int err = 0;
441 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
442 
443 	if ((u8 *)hdr < skb->head ||
444 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
445 		return;
446 
447 	/*
448 	 *	Make sure we respect the rules
449 	 *	i.e. RFC 1885 2.4(e)
450 	 *	Rule (e.1) is enforced by not using icmp6_send
451 	 *	in any code that processes icmp errors.
452 	 */
453 	addr_type = ipv6_addr_type(&hdr->daddr);
454 
455 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
456 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
457 		saddr = &hdr->daddr;
458 
459 	/*
460 	 *	Dest addr check
461 	 */
462 
463 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
464 		if (type != ICMPV6_PKT_TOOBIG &&
465 		    !(type == ICMPV6_PARAMPROB &&
466 		      code == ICMPV6_UNK_OPTION &&
467 		      (opt_unrec(skb, info))))
468 			return;
469 
470 		saddr = NULL;
471 	}
472 
473 	addr_type = ipv6_addr_type(&hdr->saddr);
474 
475 	/*
476 	 *	Source addr check
477 	 */
478 
479 	if (__ipv6_addr_needs_scope_id(addr_type)) {
480 		iif = icmp6_iif(skb);
481 	} else {
482 		dst = skb_dst(skb);
483 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
484 	}
485 
486 	/*
487 	 *	Must not send error if the source does not uniquely
488 	 *	identify a single node (RFC2463 Section 2.4).
489 	 *	We check unspecified / multicast addresses here,
490 	 *	and anycast addresses will be checked later.
491 	 */
492 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
493 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
494 				    &hdr->saddr, &hdr->daddr);
495 		return;
496 	}
497 
498 	/*
499 	 *	Never answer to a ICMP packet.
500 	 */
501 	if (is_ineligible(skb)) {
502 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
503 				    &hdr->saddr, &hdr->daddr);
504 		return;
505 	}
506 
507 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
508 	local_bh_disable();
509 
510 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
511 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
512 		goto out_bh_enable;
513 
514 	mip6_addr_swap(skb);
515 
516 	memset(&fl6, 0, sizeof(fl6));
517 	fl6.flowi6_proto = IPPROTO_ICMPV6;
518 	fl6.daddr = hdr->saddr;
519 	if (force_saddr)
520 		saddr = force_saddr;
521 	if (saddr)
522 		fl6.saddr = *saddr;
523 	fl6.flowi6_mark = mark;
524 	fl6.flowi6_oif = iif;
525 	fl6.fl6_icmp_type = type;
526 	fl6.fl6_icmp_code = code;
527 	fl6.flowi6_uid = sock_net_uid(net, NULL);
528 	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
529 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
530 
531 	sk = icmpv6_xmit_lock(net);
532 	if (!sk)
533 		goto out_bh_enable;
534 
535 	sk->sk_mark = mark;
536 	np = inet6_sk(sk);
537 
538 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
539 		goto out;
540 
541 	tmp_hdr.icmp6_type = type;
542 	tmp_hdr.icmp6_code = code;
543 	tmp_hdr.icmp6_cksum = 0;
544 	tmp_hdr.icmp6_pointer = htonl(info);
545 
546 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
547 		fl6.flowi6_oif = np->mcast_oif;
548 	else if (!fl6.flowi6_oif)
549 		fl6.flowi6_oif = np->ucast_oif;
550 
551 	ipc6.tclass = np->tclass;
552 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
553 
554 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
555 	if (IS_ERR(dst))
556 		goto out;
557 
558 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
559 	ipc6.dontfrag = np->dontfrag;
560 	ipc6.opt = NULL;
561 
562 	msg.skb = skb;
563 	msg.offset = skb_network_offset(skb);
564 	msg.type = type;
565 
566 	len = skb->len - msg.offset;
567 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
568 	if (len < 0) {
569 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
570 				    &hdr->saddr, &hdr->daddr);
571 		goto out_dst_release;
572 	}
573 
574 	rcu_read_lock();
575 	idev = __in6_dev_get(skb->dev);
576 
577 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
578 			      len + sizeof(struct icmp6hdr),
579 			      sizeof(struct icmp6hdr),
580 			      &ipc6, &fl6, (struct rt6_info *)dst,
581 			      MSG_DONTWAIT, &sockc_unused);
582 	if (err) {
583 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
584 		ip6_flush_pending_frames(sk);
585 	} else {
586 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
587 						 len + sizeof(struct icmp6hdr));
588 	}
589 	rcu_read_unlock();
590 out_dst_release:
591 	dst_release(dst);
592 out:
593 	icmpv6_xmit_unlock(sk);
594 out_bh_enable:
595 	local_bh_enable();
596 }
597 
598 /* Slightly more convenient version of icmp6_send.
599  */
600 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
601 {
602 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
603 	kfree_skb(skb);
604 }
605 
606 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
607  * if sufficient data bytes are available
608  * @nhs is the size of the tunnel header(s) :
609  *  Either an IPv4 header for SIT encap
610  *         an IPv4 header + GRE header for GRE encap
611  */
612 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
613 			       unsigned int data_len)
614 {
615 	struct in6_addr temp_saddr;
616 	struct rt6_info *rt;
617 	struct sk_buff *skb2;
618 	u32 info = 0;
619 
620 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
621 		return 1;
622 
623 	/* RFC 4884 (partial) support for ICMP extensions */
624 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
625 		data_len = 0;
626 
627 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
628 
629 	if (!skb2)
630 		return 1;
631 
632 	skb_dst_drop(skb2);
633 	skb_pull(skb2, nhs);
634 	skb_reset_network_header(skb2);
635 
636 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
637 
638 	if (rt && rt->dst.dev)
639 		skb2->dev = rt->dst.dev;
640 
641 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
642 
643 	if (data_len) {
644 		/* RFC 4884 (partial) support :
645 		 * insert 0 padding at the end, before the extensions
646 		 */
647 		__skb_push(skb2, nhs);
648 		skb_reset_network_header(skb2);
649 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
650 		memset(skb2->data + data_len - nhs, 0, nhs);
651 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
652 		 * and stored in reserved[0]
653 		 */
654 		info = (data_len/8) << 24;
655 	}
656 	if (type == ICMP_TIME_EXCEEDED)
657 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
658 			   info, &temp_saddr);
659 	else
660 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
661 			   info, &temp_saddr);
662 	if (rt)
663 		ip6_rt_put(rt);
664 
665 	kfree_skb(skb2);
666 
667 	return 0;
668 }
669 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
670 
671 static void icmpv6_echo_reply(struct sk_buff *skb)
672 {
673 	struct net *net = dev_net(skb->dev);
674 	struct sock *sk;
675 	struct inet6_dev *idev;
676 	struct ipv6_pinfo *np;
677 	const struct in6_addr *saddr = NULL;
678 	struct icmp6hdr *icmph = icmp6_hdr(skb);
679 	struct icmp6hdr tmp_hdr;
680 	struct flowi6 fl6;
681 	struct icmpv6_msg msg;
682 	struct dst_entry *dst;
683 	struct ipcm6_cookie ipc6;
684 	int err = 0;
685 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
686 	struct sockcm_cookie sockc_unused = {0};
687 
688 	saddr = &ipv6_hdr(skb)->daddr;
689 
690 	if (!ipv6_unicast_destination(skb) &&
691 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
692 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
693 		saddr = NULL;
694 
695 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
696 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
697 
698 	memset(&fl6, 0, sizeof(fl6));
699 	fl6.flowi6_proto = IPPROTO_ICMPV6;
700 	fl6.daddr = ipv6_hdr(skb)->saddr;
701 	if (saddr)
702 		fl6.saddr = *saddr;
703 	fl6.flowi6_oif = icmp6_iif(skb);
704 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
705 	fl6.flowi6_mark = mark;
706 	fl6.flowi6_uid = sock_net_uid(net, NULL);
707 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
708 
709 	local_bh_disable();
710 	sk = icmpv6_xmit_lock(net);
711 	if (!sk)
712 		goto out_bh_enable;
713 	sk->sk_mark = mark;
714 	np = inet6_sk(sk);
715 
716 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
717 		fl6.flowi6_oif = np->mcast_oif;
718 	else if (!fl6.flowi6_oif)
719 		fl6.flowi6_oif = np->ucast_oif;
720 
721 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
722 	if (err)
723 		goto out;
724 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
725 	if (IS_ERR(dst))
726 		goto out;
727 
728 	idev = __in6_dev_get(skb->dev);
729 
730 	msg.skb = skb;
731 	msg.offset = 0;
732 	msg.type = ICMPV6_ECHO_REPLY;
733 
734 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
735 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
736 	ipc6.dontfrag = np->dontfrag;
737 	ipc6.opt = NULL;
738 
739 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
740 				sizeof(struct icmp6hdr), &ipc6, &fl6,
741 				(struct rt6_info *)dst, MSG_DONTWAIT,
742 				&sockc_unused);
743 
744 	if (err) {
745 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
746 		ip6_flush_pending_frames(sk);
747 	} else {
748 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
749 						 skb->len + sizeof(struct icmp6hdr));
750 	}
751 	dst_release(dst);
752 out:
753 	icmpv6_xmit_unlock(sk);
754 out_bh_enable:
755 	local_bh_enable();
756 }
757 
758 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
759 {
760 	const struct inet6_protocol *ipprot;
761 	int inner_offset;
762 	__be16 frag_off;
763 	u8 nexthdr;
764 	struct net *net = dev_net(skb->dev);
765 
766 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
767 		goto out;
768 
769 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
770 	if (ipv6_ext_hdr(nexthdr)) {
771 		/* now skip over extension headers */
772 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
773 						&nexthdr, &frag_off);
774 		if (inner_offset < 0)
775 			goto out;
776 	} else {
777 		inner_offset = sizeof(struct ipv6hdr);
778 	}
779 
780 	/* Checkin header including 8 bytes of inner protocol header. */
781 	if (!pskb_may_pull(skb, inner_offset+8))
782 		goto out;
783 
784 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
785 	   Without this we will not able f.e. to make source routed
786 	   pmtu discovery.
787 	   Corresponding argument (opt) to notifiers is already added.
788 	   --ANK (980726)
789 	 */
790 
791 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
792 	if (ipprot && ipprot->err_handler)
793 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
794 
795 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
796 	return;
797 
798 out:
799 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
800 }
801 
802 /*
803  *	Handle icmp messages
804  */
805 
806 static int icmpv6_rcv(struct sk_buff *skb)
807 {
808 	struct net_device *dev = skb->dev;
809 	struct inet6_dev *idev = __in6_dev_get(dev);
810 	const struct in6_addr *saddr, *daddr;
811 	struct icmp6hdr *hdr;
812 	u8 type;
813 	bool success = false;
814 
815 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
816 		struct sec_path *sp = skb_sec_path(skb);
817 		int nh;
818 
819 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
820 				 XFRM_STATE_ICMP))
821 			goto drop_no_count;
822 
823 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
824 			goto drop_no_count;
825 
826 		nh = skb_network_offset(skb);
827 		skb_set_network_header(skb, sizeof(*hdr));
828 
829 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
830 			goto drop_no_count;
831 
832 		skb_set_network_header(skb, nh);
833 	}
834 
835 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
836 
837 	saddr = &ipv6_hdr(skb)->saddr;
838 	daddr = &ipv6_hdr(skb)->daddr;
839 
840 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
841 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
842 				    saddr, daddr);
843 		goto csum_error;
844 	}
845 
846 	if (!pskb_pull(skb, sizeof(*hdr)))
847 		goto discard_it;
848 
849 	hdr = icmp6_hdr(skb);
850 
851 	type = hdr->icmp6_type;
852 
853 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
854 
855 	switch (type) {
856 	case ICMPV6_ECHO_REQUEST:
857 		icmpv6_echo_reply(skb);
858 		break;
859 
860 	case ICMPV6_ECHO_REPLY:
861 		success = ping_rcv(skb);
862 		break;
863 
864 	case ICMPV6_PKT_TOOBIG:
865 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
866 		   standard destination cache. Seems, only "advanced"
867 		   destination cache will allow to solve this problem
868 		   --ANK (980726)
869 		 */
870 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
871 			goto discard_it;
872 		hdr = icmp6_hdr(skb);
873 
874 		/*
875 		 *	Drop through to notify
876 		 */
877 
878 	case ICMPV6_DEST_UNREACH:
879 	case ICMPV6_TIME_EXCEED:
880 	case ICMPV6_PARAMPROB:
881 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
882 		break;
883 
884 	case NDISC_ROUTER_SOLICITATION:
885 	case NDISC_ROUTER_ADVERTISEMENT:
886 	case NDISC_NEIGHBOUR_SOLICITATION:
887 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
888 	case NDISC_REDIRECT:
889 		ndisc_rcv(skb);
890 		break;
891 
892 	case ICMPV6_MGM_QUERY:
893 		igmp6_event_query(skb);
894 		break;
895 
896 	case ICMPV6_MGM_REPORT:
897 		igmp6_event_report(skb);
898 		break;
899 
900 	case ICMPV6_MGM_REDUCTION:
901 	case ICMPV6_NI_QUERY:
902 	case ICMPV6_NI_REPLY:
903 	case ICMPV6_MLD2_REPORT:
904 	case ICMPV6_DHAAD_REQUEST:
905 	case ICMPV6_DHAAD_REPLY:
906 	case ICMPV6_MOBILE_PREFIX_SOL:
907 	case ICMPV6_MOBILE_PREFIX_ADV:
908 		break;
909 
910 	default:
911 		/* informational */
912 		if (type & ICMPV6_INFOMSG_MASK)
913 			break;
914 
915 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
916 				    saddr, daddr);
917 
918 		/*
919 		 * error of unknown type.
920 		 * must pass to upper level
921 		 */
922 
923 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
924 	}
925 
926 	/* until the v6 path can be better sorted assume failure and
927 	 * preserve the status quo behaviour for the rest of the paths to here
928 	 */
929 	if (success)
930 		consume_skb(skb);
931 	else
932 		kfree_skb(skb);
933 
934 	return 0;
935 
936 csum_error:
937 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
938 discard_it:
939 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
940 drop_no_count:
941 	kfree_skb(skb);
942 	return 0;
943 }
944 
945 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
946 		      u8 type,
947 		      const struct in6_addr *saddr,
948 		      const struct in6_addr *daddr,
949 		      int oif)
950 {
951 	memset(fl6, 0, sizeof(*fl6));
952 	fl6->saddr = *saddr;
953 	fl6->daddr = *daddr;
954 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
955 	fl6->fl6_icmp_type	= type;
956 	fl6->fl6_icmp_code	= 0;
957 	fl6->flowi6_oif		= oif;
958 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
959 }
960 
961 static int __net_init icmpv6_sk_init(struct net *net)
962 {
963 	struct sock *sk;
964 	int err, i, j;
965 
966 	net->ipv6.icmp_sk =
967 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
968 	if (!net->ipv6.icmp_sk)
969 		return -ENOMEM;
970 
971 	for_each_possible_cpu(i) {
972 		err = inet_ctl_sock_create(&sk, PF_INET6,
973 					   SOCK_RAW, IPPROTO_ICMPV6, net);
974 		if (err < 0) {
975 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
976 			       err);
977 			goto fail;
978 		}
979 
980 		net->ipv6.icmp_sk[i] = sk;
981 
982 		/* Enough space for 2 64K ICMP packets, including
983 		 * sk_buff struct overhead.
984 		 */
985 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
986 	}
987 	return 0;
988 
989  fail:
990 	for (j = 0; j < i; j++)
991 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
992 	kfree(net->ipv6.icmp_sk);
993 	return err;
994 }
995 
996 static void __net_exit icmpv6_sk_exit(struct net *net)
997 {
998 	int i;
999 
1000 	for_each_possible_cpu(i) {
1001 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
1002 	}
1003 	kfree(net->ipv6.icmp_sk);
1004 }
1005 
1006 static struct pernet_operations icmpv6_sk_ops = {
1007 	.init = icmpv6_sk_init,
1008 	.exit = icmpv6_sk_exit,
1009 };
1010 
1011 int __init icmpv6_init(void)
1012 {
1013 	int err;
1014 
1015 	err = register_pernet_subsys(&icmpv6_sk_ops);
1016 	if (err < 0)
1017 		return err;
1018 
1019 	err = -EAGAIN;
1020 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1021 		goto fail;
1022 
1023 	err = inet6_register_icmp_sender(icmp6_send);
1024 	if (err)
1025 		goto sender_reg_err;
1026 	return 0;
1027 
1028 sender_reg_err:
1029 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1030 fail:
1031 	pr_err("Failed to register ICMP6 protocol\n");
1032 	unregister_pernet_subsys(&icmpv6_sk_ops);
1033 	return err;
1034 }
1035 
1036 void icmpv6_cleanup(void)
1037 {
1038 	inet6_unregister_icmp_sender(icmp6_send);
1039 	unregister_pernet_subsys(&icmpv6_sk_ops);
1040 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1041 }
1042 
1043 
1044 static const struct icmp6_err {
1045 	int err;
1046 	int fatal;
1047 } tab_unreach[] = {
1048 	{	/* NOROUTE */
1049 		.err	= ENETUNREACH,
1050 		.fatal	= 0,
1051 	},
1052 	{	/* ADM_PROHIBITED */
1053 		.err	= EACCES,
1054 		.fatal	= 1,
1055 	},
1056 	{	/* Was NOT_NEIGHBOUR, now reserved */
1057 		.err	= EHOSTUNREACH,
1058 		.fatal	= 0,
1059 	},
1060 	{	/* ADDR_UNREACH	*/
1061 		.err	= EHOSTUNREACH,
1062 		.fatal	= 0,
1063 	},
1064 	{	/* PORT_UNREACH	*/
1065 		.err	= ECONNREFUSED,
1066 		.fatal	= 1,
1067 	},
1068 	{	/* POLICY_FAIL */
1069 		.err	= EACCES,
1070 		.fatal	= 1,
1071 	},
1072 	{	/* REJECT_ROUTE	*/
1073 		.err	= EACCES,
1074 		.fatal	= 1,
1075 	},
1076 };
1077 
1078 int icmpv6_err_convert(u8 type, u8 code, int *err)
1079 {
1080 	int fatal = 0;
1081 
1082 	*err = EPROTO;
1083 
1084 	switch (type) {
1085 	case ICMPV6_DEST_UNREACH:
1086 		fatal = 1;
1087 		if (code < ARRAY_SIZE(tab_unreach)) {
1088 			*err  = tab_unreach[code].err;
1089 			fatal = tab_unreach[code].fatal;
1090 		}
1091 		break;
1092 
1093 	case ICMPV6_PKT_TOOBIG:
1094 		*err = EMSGSIZE;
1095 		break;
1096 
1097 	case ICMPV6_PARAMPROB:
1098 		*err = EPROTO;
1099 		fatal = 1;
1100 		break;
1101 
1102 	case ICMPV6_TIME_EXCEED:
1103 		*err = EHOSTUNREACH;
1104 		break;
1105 	}
1106 
1107 	return fatal;
1108 }
1109 EXPORT_SYMBOL(icmpv6_err_convert);
1110 
1111 #ifdef CONFIG_SYSCTL
1112 static struct ctl_table ipv6_icmp_table_template[] = {
1113 	{
1114 		.procname	= "ratelimit",
1115 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1116 		.maxlen		= sizeof(int),
1117 		.mode		= 0644,
1118 		.proc_handler	= proc_dointvec_ms_jiffies,
1119 	},
1120 	{ },
1121 };
1122 
1123 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1124 {
1125 	struct ctl_table *table;
1126 
1127 	table = kmemdup(ipv6_icmp_table_template,
1128 			sizeof(ipv6_icmp_table_template),
1129 			GFP_KERNEL);
1130 
1131 	if (table)
1132 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1133 
1134 	return table;
1135 }
1136 #endif
1137