xref: /openbmc/linux/net/ipv6/icmp.c (revision 4a2f7083)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180 	if (icmpv6_mask_allow(net, type))
181 		return true;
182 
183 	if (icmp_global_allow())
184 		return true;
185 
186 	return false;
187 }
188 
189 /*
190  * Check the ICMP output rate limit
191  */
192 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
193 			       struct flowi6 *fl6)
194 {
195 	struct net *net = sock_net(sk);
196 	struct dst_entry *dst;
197 	bool res = false;
198 
199 	if (icmpv6_mask_allow(net, type))
200 		return true;
201 
202 	/*
203 	 * Look up the output route.
204 	 * XXX: perhaps the expire for routing entries cloned by
205 	 * this lookup should be more aggressive (not longer than timeout).
206 	 */
207 	dst = ip6_route_output(net, sk, fl6);
208 	if (dst->error) {
209 		IP6_INC_STATS(net, ip6_dst_idev(dst),
210 			      IPSTATS_MIB_OUTNOROUTES);
211 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
212 		res = true;
213 	} else {
214 		struct rt6_info *rt = (struct rt6_info *)dst;
215 		int tmo = net->ipv6.sysctl.icmpv6_time;
216 		struct inet_peer *peer;
217 
218 		/* Give more bandwidth to wider prefixes. */
219 		if (rt->rt6i_dst.plen < 128)
220 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
221 
222 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
223 		res = inet_peer_xrlim_allow(peer, tmo);
224 		if (peer)
225 			inet_putpeer(peer);
226 	}
227 	dst_release(dst);
228 	return res;
229 }
230 
231 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
232 				  struct flowi6 *fl6)
233 {
234 	struct net *net = sock_net(sk);
235 	struct dst_entry *dst;
236 	bool res = false;
237 
238 	dst = ip6_route_output(net, sk, fl6);
239 	if (!dst->error) {
240 		struct rt6_info *rt = (struct rt6_info *)dst;
241 		struct in6_addr prefsrc;
242 
243 		rt6_get_prefsrc(rt, &prefsrc);
244 		res = !ipv6_addr_any(&prefsrc);
245 	}
246 	dst_release(dst);
247 	return res;
248 }
249 
250 /*
251  *	an inline helper for the "simple" if statement below
252  *	checks if parameter problem report is caused by an
253  *	unrecognized IPv6 option that has the Option Type
254  *	highest-order two bits set to 10
255  */
256 
257 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
258 {
259 	u8 _optval, *op;
260 
261 	offset += skb_network_offset(skb);
262 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
263 	if (!op)
264 		return true;
265 	return (*op & 0xC0) == 0x80;
266 }
267 
268 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
269 				struct icmp6hdr *thdr, int len)
270 {
271 	struct sk_buff *skb;
272 	struct icmp6hdr *icmp6h;
273 
274 	skb = skb_peek(&sk->sk_write_queue);
275 	if (!skb)
276 		return;
277 
278 	icmp6h = icmp6_hdr(skb);
279 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
280 	icmp6h->icmp6_cksum = 0;
281 
282 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
283 		skb->csum = csum_partial(icmp6h,
284 					sizeof(struct icmp6hdr), skb->csum);
285 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
286 						      &fl6->daddr,
287 						      len, fl6->flowi6_proto,
288 						      skb->csum);
289 	} else {
290 		__wsum tmp_csum = 0;
291 
292 		skb_queue_walk(&sk->sk_write_queue, skb) {
293 			tmp_csum = csum_add(tmp_csum, skb->csum);
294 		}
295 
296 		tmp_csum = csum_partial(icmp6h,
297 					sizeof(struct icmp6hdr), tmp_csum);
298 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
299 						      &fl6->daddr,
300 						      len, fl6->flowi6_proto,
301 						      tmp_csum);
302 	}
303 	ip6_push_pending_frames(sk);
304 }
305 
306 struct icmpv6_msg {
307 	struct sk_buff	*skb;
308 	int		offset;
309 	uint8_t		type;
310 };
311 
312 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
313 {
314 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
315 	struct sk_buff *org_skb = msg->skb;
316 	__wsum csum;
317 
318 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
319 				      to, len);
320 	skb->csum = csum_block_add(skb->csum, csum, odd);
321 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
322 		nf_ct_attach(skb, org_skb);
323 	return 0;
324 }
325 
326 #if IS_ENABLED(CONFIG_IPV6_MIP6)
327 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
328 {
329 	struct ipv6hdr *iph = ipv6_hdr(skb);
330 	struct ipv6_destopt_hao *hao;
331 	struct in6_addr tmp;
332 	int off;
333 
334 	if (opt->dsthao) {
335 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
336 		if (likely(off >= 0)) {
337 			hao = (struct ipv6_destopt_hao *)
338 					(skb_network_header(skb) + off);
339 			tmp = iph->saddr;
340 			iph->saddr = hao->addr;
341 			hao->addr = tmp;
342 		}
343 	}
344 }
345 #else
346 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
347 #endif
348 
349 static struct dst_entry *icmpv6_route_lookup(struct net *net,
350 					     struct sk_buff *skb,
351 					     struct sock *sk,
352 					     struct flowi6 *fl6)
353 {
354 	struct dst_entry *dst, *dst2;
355 	struct flowi6 fl2;
356 	int err;
357 
358 	err = ip6_dst_lookup(net, sk, &dst, fl6);
359 	if (err)
360 		return ERR_PTR(err);
361 
362 	/*
363 	 * We won't send icmp if the destination is known
364 	 * anycast.
365 	 */
366 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
367 		net_dbg_ratelimited("icmp6_send: acast source\n");
368 		dst_release(dst);
369 		return ERR_PTR(-EINVAL);
370 	}
371 
372 	/* No need to clone since we're just using its address. */
373 	dst2 = dst;
374 
375 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
376 	if (!IS_ERR(dst)) {
377 		if (dst != dst2)
378 			return dst;
379 	} else {
380 		if (PTR_ERR(dst) == -EPERM)
381 			dst = NULL;
382 		else
383 			return dst;
384 	}
385 
386 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
387 	if (err)
388 		goto relookup_failed;
389 
390 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
391 	if (err)
392 		goto relookup_failed;
393 
394 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
395 	if (!IS_ERR(dst2)) {
396 		dst_release(dst);
397 		dst = dst2;
398 	} else {
399 		err = PTR_ERR(dst2);
400 		if (err == -EPERM) {
401 			dst_release(dst);
402 			return dst2;
403 		} else
404 			goto relookup_failed;
405 	}
406 
407 relookup_failed:
408 	if (dst)
409 		return dst;
410 	return ERR_PTR(err);
411 }
412 
413 static struct net_device *icmp6_dev(const struct sk_buff *skb)
414 {
415 	struct net_device *dev = skb->dev;
416 
417 	/* for local traffic to local address, skb dev is the loopback
418 	 * device. Check if there is a dst attached to the skb and if so
419 	 * get the real device index. Same is needed for replies to a link
420 	 * local address on a device enslaved to an L3 master device
421 	 */
422 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
423 		const struct rt6_info *rt6 = skb_rt6_info(skb);
424 
425 		if (rt6)
426 			dev = rt6->rt6i_idev->dev;
427 	}
428 
429 	return dev;
430 }
431 
432 static int icmp6_iif(const struct sk_buff *skb)
433 {
434 	return icmp6_dev(skb)->ifindex;
435 }
436 
437 /*
438  *	Send an ICMP message in response to a packet in error
439  */
440 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
441 		const struct in6_addr *force_saddr,
442 		const struct inet6_skb_parm *parm)
443 {
444 	struct inet6_dev *idev = NULL;
445 	struct ipv6hdr *hdr = ipv6_hdr(skb);
446 	struct sock *sk;
447 	struct net *net;
448 	struct ipv6_pinfo *np;
449 	const struct in6_addr *saddr = NULL;
450 	struct dst_entry *dst;
451 	struct icmp6hdr tmp_hdr;
452 	struct flowi6 fl6;
453 	struct icmpv6_msg msg;
454 	struct ipcm6_cookie ipc6;
455 	int iif = 0;
456 	int addr_type = 0;
457 	int len;
458 	u32 mark;
459 
460 	if ((u8 *)hdr < skb->head ||
461 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
462 		return;
463 
464 	if (!skb->dev)
465 		return;
466 	net = dev_net(skb->dev);
467 	mark = IP6_REPLY_MARK(net, skb->mark);
468 	/*
469 	 *	Make sure we respect the rules
470 	 *	i.e. RFC 1885 2.4(e)
471 	 *	Rule (e.1) is enforced by not using icmp6_send
472 	 *	in any code that processes icmp errors.
473 	 */
474 	addr_type = ipv6_addr_type(&hdr->daddr);
475 
476 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
477 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
478 		saddr = &hdr->daddr;
479 
480 	/*
481 	 *	Dest addr check
482 	 */
483 
484 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
485 		if (type != ICMPV6_PKT_TOOBIG &&
486 		    !(type == ICMPV6_PARAMPROB &&
487 		      code == ICMPV6_UNK_OPTION &&
488 		      (opt_unrec(skb, info))))
489 			return;
490 
491 		saddr = NULL;
492 	}
493 
494 	addr_type = ipv6_addr_type(&hdr->saddr);
495 
496 	/*
497 	 *	Source addr check
498 	 */
499 
500 	if (__ipv6_addr_needs_scope_id(addr_type)) {
501 		iif = icmp6_iif(skb);
502 	} else {
503 		/*
504 		 * The source device is used for looking up which routing table
505 		 * to use for sending an ICMP error.
506 		 */
507 		iif = l3mdev_master_ifindex(skb->dev);
508 	}
509 
510 	/*
511 	 *	Must not send error if the source does not uniquely
512 	 *	identify a single node (RFC2463 Section 2.4).
513 	 *	We check unspecified / multicast addresses here,
514 	 *	and anycast addresses will be checked later.
515 	 */
516 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
517 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
518 				    &hdr->saddr, &hdr->daddr);
519 		return;
520 	}
521 
522 	/*
523 	 *	Never answer to a ICMP packet.
524 	 */
525 	if (is_ineligible(skb)) {
526 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
527 				    &hdr->saddr, &hdr->daddr);
528 		return;
529 	}
530 
531 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
532 	local_bh_disable();
533 
534 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
535 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
536 		goto out_bh_enable;
537 
538 	mip6_addr_swap(skb, parm);
539 
540 	sk = icmpv6_xmit_lock(net);
541 	if (!sk)
542 		goto out_bh_enable;
543 
544 	memset(&fl6, 0, sizeof(fl6));
545 	fl6.flowi6_proto = IPPROTO_ICMPV6;
546 	fl6.daddr = hdr->saddr;
547 	if (force_saddr)
548 		saddr = force_saddr;
549 	if (saddr) {
550 		fl6.saddr = *saddr;
551 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
552 		/* select a more meaningful saddr from input if */
553 		struct net_device *in_netdev;
554 
555 		in_netdev = dev_get_by_index(net, parm->iif);
556 		if (in_netdev) {
557 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
558 					   inet6_sk(sk)->srcprefs,
559 					   &fl6.saddr);
560 			dev_put(in_netdev);
561 		}
562 	}
563 	fl6.flowi6_mark = mark;
564 	fl6.flowi6_oif = iif;
565 	fl6.fl6_icmp_type = type;
566 	fl6.fl6_icmp_code = code;
567 	fl6.flowi6_uid = sock_net_uid(net, NULL);
568 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
569 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
570 
571 	np = inet6_sk(sk);
572 
573 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
574 		goto out;
575 
576 	tmp_hdr.icmp6_type = type;
577 	tmp_hdr.icmp6_code = code;
578 	tmp_hdr.icmp6_cksum = 0;
579 	tmp_hdr.icmp6_pointer = htonl(info);
580 
581 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
582 		fl6.flowi6_oif = np->mcast_oif;
583 	else if (!fl6.flowi6_oif)
584 		fl6.flowi6_oif = np->ucast_oif;
585 
586 	ipcm6_init_sk(&ipc6, np);
587 	ipc6.sockc.mark = mark;
588 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
589 
590 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
591 	if (IS_ERR(dst))
592 		goto out;
593 
594 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
595 
596 	msg.skb = skb;
597 	msg.offset = skb_network_offset(skb);
598 	msg.type = type;
599 
600 	len = skb->len - msg.offset;
601 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
602 	if (len < 0) {
603 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
604 				    &hdr->saddr, &hdr->daddr);
605 		goto out_dst_release;
606 	}
607 
608 	rcu_read_lock();
609 	idev = __in6_dev_get(skb->dev);
610 
611 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
612 			    len + sizeof(struct icmp6hdr),
613 			    sizeof(struct icmp6hdr),
614 			    &ipc6, &fl6, (struct rt6_info *)dst,
615 			    MSG_DONTWAIT)) {
616 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
617 		ip6_flush_pending_frames(sk);
618 	} else {
619 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
620 					   len + sizeof(struct icmp6hdr));
621 	}
622 	rcu_read_unlock();
623 out_dst_release:
624 	dst_release(dst);
625 out:
626 	icmpv6_xmit_unlock(sk);
627 out_bh_enable:
628 	local_bh_enable();
629 }
630 EXPORT_SYMBOL(icmp6_send);
631 
632 /* Slightly more convenient version of icmp6_send with drop reasons.
633  */
634 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
635 			      enum skb_drop_reason reason)
636 {
637 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
638 	kfree_skb_reason(skb, reason);
639 }
640 
641 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
642  * if sufficient data bytes are available
643  * @nhs is the size of the tunnel header(s) :
644  *  Either an IPv4 header for SIT encap
645  *         an IPv4 header + GRE header for GRE encap
646  */
647 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
648 			       unsigned int data_len)
649 {
650 	struct in6_addr temp_saddr;
651 	struct rt6_info *rt;
652 	struct sk_buff *skb2;
653 	u32 info = 0;
654 
655 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
656 		return 1;
657 
658 	/* RFC 4884 (partial) support for ICMP extensions */
659 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
660 		data_len = 0;
661 
662 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
663 
664 	if (!skb2)
665 		return 1;
666 
667 	skb_dst_drop(skb2);
668 	skb_pull(skb2, nhs);
669 	skb_reset_network_header(skb2);
670 
671 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
672 			skb, 0);
673 
674 	if (rt && rt->dst.dev)
675 		skb2->dev = rt->dst.dev;
676 
677 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
678 
679 	if (data_len) {
680 		/* RFC 4884 (partial) support :
681 		 * insert 0 padding at the end, before the extensions
682 		 */
683 		__skb_push(skb2, nhs);
684 		skb_reset_network_header(skb2);
685 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
686 		memset(skb2->data + data_len - nhs, 0, nhs);
687 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
688 		 * and stored in reserved[0]
689 		 */
690 		info = (data_len/8) << 24;
691 	}
692 	if (type == ICMP_TIME_EXCEEDED)
693 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
694 			   info, &temp_saddr, IP6CB(skb2));
695 	else
696 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
697 			   info, &temp_saddr, IP6CB(skb2));
698 	if (rt)
699 		ip6_rt_put(rt);
700 
701 	kfree_skb(skb2);
702 
703 	return 0;
704 }
705 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
706 
707 static void icmpv6_echo_reply(struct sk_buff *skb)
708 {
709 	struct net *net = dev_net(skb->dev);
710 	struct sock *sk;
711 	struct inet6_dev *idev;
712 	struct ipv6_pinfo *np;
713 	const struct in6_addr *saddr = NULL;
714 	struct icmp6hdr *icmph = icmp6_hdr(skb);
715 	struct icmp6hdr tmp_hdr;
716 	struct flowi6 fl6;
717 	struct icmpv6_msg msg;
718 	struct dst_entry *dst;
719 	struct ipcm6_cookie ipc6;
720 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
721 	bool acast;
722 	u8 type;
723 
724 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
725 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
726 		return;
727 
728 	saddr = &ipv6_hdr(skb)->daddr;
729 
730 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
731 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
732 		return;
733 
734 	if (!ipv6_unicast_destination(skb) &&
735 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
736 		saddr = NULL;
737 
738 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
739 		type = ICMPV6_EXT_ECHO_REPLY;
740 	else
741 		type = ICMPV6_ECHO_REPLY;
742 
743 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
744 	tmp_hdr.icmp6_type = type;
745 
746 	memset(&fl6, 0, sizeof(fl6));
747 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
748 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
749 
750 	fl6.flowi6_proto = IPPROTO_ICMPV6;
751 	fl6.daddr = ipv6_hdr(skb)->saddr;
752 	if (saddr)
753 		fl6.saddr = *saddr;
754 	fl6.flowi6_oif = icmp6_iif(skb);
755 	fl6.fl6_icmp_type = type;
756 	fl6.flowi6_mark = mark;
757 	fl6.flowi6_uid = sock_net_uid(net, NULL);
758 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
759 
760 	local_bh_disable();
761 	sk = icmpv6_xmit_lock(net);
762 	if (!sk)
763 		goto out_bh_enable;
764 	np = inet6_sk(sk);
765 
766 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
767 		fl6.flowi6_oif = np->mcast_oif;
768 	else if (!fl6.flowi6_oif)
769 		fl6.flowi6_oif = np->ucast_oif;
770 
771 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
772 		goto out;
773 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
774 	if (IS_ERR(dst))
775 		goto out;
776 
777 	/* Check the ratelimit */
778 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
779 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
780 		goto out_dst_release;
781 
782 	idev = __in6_dev_get(skb->dev);
783 
784 	msg.skb = skb;
785 	msg.offset = 0;
786 	msg.type = type;
787 
788 	ipcm6_init_sk(&ipc6, np);
789 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
790 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
791 	ipc6.sockc.mark = mark;
792 
793 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
794 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
795 			goto out_dst_release;
796 
797 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
798 			    skb->len + sizeof(struct icmp6hdr),
799 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
800 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
801 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
802 		ip6_flush_pending_frames(sk);
803 	} else {
804 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
805 					   skb->len + sizeof(struct icmp6hdr));
806 	}
807 out_dst_release:
808 	dst_release(dst);
809 out:
810 	icmpv6_xmit_unlock(sk);
811 out_bh_enable:
812 	local_bh_enable();
813 }
814 
815 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
816 {
817 	struct inet6_skb_parm *opt = IP6CB(skb);
818 	const struct inet6_protocol *ipprot;
819 	int inner_offset;
820 	__be16 frag_off;
821 	u8 nexthdr;
822 	struct net *net = dev_net(skb->dev);
823 
824 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
825 		goto out;
826 
827 	seg6_icmp_srh(skb, opt);
828 
829 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
830 	if (ipv6_ext_hdr(nexthdr)) {
831 		/* now skip over extension headers */
832 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
833 						&nexthdr, &frag_off);
834 		if (inner_offset < 0)
835 			goto out;
836 	} else {
837 		inner_offset = sizeof(struct ipv6hdr);
838 	}
839 
840 	/* Checkin header including 8 bytes of inner protocol header. */
841 	if (!pskb_may_pull(skb, inner_offset+8))
842 		goto out;
843 
844 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
845 	   Without this we will not able f.e. to make source routed
846 	   pmtu discovery.
847 	   Corresponding argument (opt) to notifiers is already added.
848 	   --ANK (980726)
849 	 */
850 
851 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
852 	if (ipprot && ipprot->err_handler)
853 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
854 
855 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
856 	return;
857 
858 out:
859 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
860 }
861 
862 /*
863  *	Handle icmp messages
864  */
865 
866 static int icmpv6_rcv(struct sk_buff *skb)
867 {
868 	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
869 	struct net *net = dev_net(skb->dev);
870 	struct net_device *dev = icmp6_dev(skb);
871 	struct inet6_dev *idev = __in6_dev_get(dev);
872 	const struct in6_addr *saddr, *daddr;
873 	struct icmp6hdr *hdr;
874 	u8 type;
875 
876 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
877 		struct sec_path *sp = skb_sec_path(skb);
878 		int nh;
879 
880 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
881 				 XFRM_STATE_ICMP)) {
882 			reason = SKB_DROP_REASON_XFRM_POLICY;
883 			goto drop_no_count;
884 		}
885 
886 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
887 			goto drop_no_count;
888 
889 		nh = skb_network_offset(skb);
890 		skb_set_network_header(skb, sizeof(*hdr));
891 
892 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
893 						skb)) {
894 			reason = SKB_DROP_REASON_XFRM_POLICY;
895 			goto drop_no_count;
896 		}
897 
898 		skb_set_network_header(skb, nh);
899 	}
900 
901 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
902 
903 	saddr = &ipv6_hdr(skb)->saddr;
904 	daddr = &ipv6_hdr(skb)->daddr;
905 
906 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
907 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
908 				    saddr, daddr);
909 		goto csum_error;
910 	}
911 
912 	if (!pskb_pull(skb, sizeof(*hdr)))
913 		goto discard_it;
914 
915 	hdr = icmp6_hdr(skb);
916 
917 	type = hdr->icmp6_type;
918 
919 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
920 
921 	switch (type) {
922 	case ICMPV6_ECHO_REQUEST:
923 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
924 			icmpv6_echo_reply(skb);
925 		break;
926 	case ICMPV6_EXT_ECHO_REQUEST:
927 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
928 		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
929 			icmpv6_echo_reply(skb);
930 		break;
931 
932 	case ICMPV6_ECHO_REPLY:
933 		reason = ping_rcv(skb);
934 		break;
935 
936 	case ICMPV6_EXT_ECHO_REPLY:
937 		reason = ping_rcv(skb);
938 		break;
939 
940 	case ICMPV6_PKT_TOOBIG:
941 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
942 		   standard destination cache. Seems, only "advanced"
943 		   destination cache will allow to solve this problem
944 		   --ANK (980726)
945 		 */
946 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
947 			goto discard_it;
948 		hdr = icmp6_hdr(skb);
949 
950 		/* to notify */
951 		fallthrough;
952 	case ICMPV6_DEST_UNREACH:
953 	case ICMPV6_TIME_EXCEED:
954 	case ICMPV6_PARAMPROB:
955 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
956 		break;
957 
958 	case NDISC_ROUTER_SOLICITATION:
959 	case NDISC_ROUTER_ADVERTISEMENT:
960 	case NDISC_NEIGHBOUR_SOLICITATION:
961 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
962 	case NDISC_REDIRECT:
963 		ndisc_rcv(skb);
964 		break;
965 
966 	case ICMPV6_MGM_QUERY:
967 		igmp6_event_query(skb);
968 		return 0;
969 
970 	case ICMPV6_MGM_REPORT:
971 		igmp6_event_report(skb);
972 		return 0;
973 
974 	case ICMPV6_MGM_REDUCTION:
975 	case ICMPV6_NI_QUERY:
976 	case ICMPV6_NI_REPLY:
977 	case ICMPV6_MLD2_REPORT:
978 	case ICMPV6_DHAAD_REQUEST:
979 	case ICMPV6_DHAAD_REPLY:
980 	case ICMPV6_MOBILE_PREFIX_SOL:
981 	case ICMPV6_MOBILE_PREFIX_ADV:
982 		break;
983 
984 	default:
985 		/* informational */
986 		if (type & ICMPV6_INFOMSG_MASK)
987 			break;
988 
989 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
990 				    saddr, daddr);
991 
992 		/*
993 		 * error of unknown type.
994 		 * must pass to upper level
995 		 */
996 
997 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
998 	}
999 
1000 	/* until the v6 path can be better sorted assume failure and
1001 	 * preserve the status quo behaviour for the rest of the paths to here
1002 	 */
1003 	if (reason)
1004 		kfree_skb_reason(skb, reason);
1005 	else
1006 		consume_skb(skb);
1007 
1008 	return 0;
1009 
1010 csum_error:
1011 	reason = SKB_DROP_REASON_ICMP_CSUM;
1012 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1013 discard_it:
1014 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1015 drop_no_count:
1016 	kfree_skb_reason(skb, reason);
1017 	return 0;
1018 }
1019 
1020 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1021 		      u8 type,
1022 		      const struct in6_addr *saddr,
1023 		      const struct in6_addr *daddr,
1024 		      int oif)
1025 {
1026 	memset(fl6, 0, sizeof(*fl6));
1027 	fl6->saddr = *saddr;
1028 	fl6->daddr = *daddr;
1029 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1030 	fl6->fl6_icmp_type	= type;
1031 	fl6->fl6_icmp_code	= 0;
1032 	fl6->flowi6_oif		= oif;
1033 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1034 }
1035 
1036 int __init icmpv6_init(void)
1037 {
1038 	struct sock *sk;
1039 	int err, i;
1040 
1041 	for_each_possible_cpu(i) {
1042 		err = inet_ctl_sock_create(&sk, PF_INET6,
1043 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1044 		if (err < 0) {
1045 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1046 			       err);
1047 			return err;
1048 		}
1049 
1050 		per_cpu(ipv6_icmp_sk, i) = sk;
1051 
1052 		/* Enough space for 2 64K ICMP packets, including
1053 		 * sk_buff struct overhead.
1054 		 */
1055 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1056 	}
1057 
1058 	err = -EAGAIN;
1059 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1060 		goto fail;
1061 
1062 	err = inet6_register_icmp_sender(icmp6_send);
1063 	if (err)
1064 		goto sender_reg_err;
1065 	return 0;
1066 
1067 sender_reg_err:
1068 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1069 fail:
1070 	pr_err("Failed to register ICMP6 protocol\n");
1071 	return err;
1072 }
1073 
1074 void icmpv6_cleanup(void)
1075 {
1076 	inet6_unregister_icmp_sender(icmp6_send);
1077 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1078 }
1079 
1080 
1081 static const struct icmp6_err {
1082 	int err;
1083 	int fatal;
1084 } tab_unreach[] = {
1085 	{	/* NOROUTE */
1086 		.err	= ENETUNREACH,
1087 		.fatal	= 0,
1088 	},
1089 	{	/* ADM_PROHIBITED */
1090 		.err	= EACCES,
1091 		.fatal	= 1,
1092 	},
1093 	{	/* Was NOT_NEIGHBOUR, now reserved */
1094 		.err	= EHOSTUNREACH,
1095 		.fatal	= 0,
1096 	},
1097 	{	/* ADDR_UNREACH	*/
1098 		.err	= EHOSTUNREACH,
1099 		.fatal	= 0,
1100 	},
1101 	{	/* PORT_UNREACH	*/
1102 		.err	= ECONNREFUSED,
1103 		.fatal	= 1,
1104 	},
1105 	{	/* POLICY_FAIL */
1106 		.err	= EACCES,
1107 		.fatal	= 1,
1108 	},
1109 	{	/* REJECT_ROUTE	*/
1110 		.err	= EACCES,
1111 		.fatal	= 1,
1112 	},
1113 };
1114 
1115 int icmpv6_err_convert(u8 type, u8 code, int *err)
1116 {
1117 	int fatal = 0;
1118 
1119 	*err = EPROTO;
1120 
1121 	switch (type) {
1122 	case ICMPV6_DEST_UNREACH:
1123 		fatal = 1;
1124 		if (code < ARRAY_SIZE(tab_unreach)) {
1125 			*err  = tab_unreach[code].err;
1126 			fatal = tab_unreach[code].fatal;
1127 		}
1128 		break;
1129 
1130 	case ICMPV6_PKT_TOOBIG:
1131 		*err = EMSGSIZE;
1132 		break;
1133 
1134 	case ICMPV6_PARAMPROB:
1135 		*err = EPROTO;
1136 		fatal = 1;
1137 		break;
1138 
1139 	case ICMPV6_TIME_EXCEED:
1140 		*err = EHOSTUNREACH;
1141 		break;
1142 	}
1143 
1144 	return fatal;
1145 }
1146 EXPORT_SYMBOL(icmpv6_err_convert);
1147 
1148 #ifdef CONFIG_SYSCTL
1149 static struct ctl_table ipv6_icmp_table_template[] = {
1150 	{
1151 		.procname	= "ratelimit",
1152 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1153 		.maxlen		= sizeof(int),
1154 		.mode		= 0644,
1155 		.proc_handler	= proc_dointvec_ms_jiffies,
1156 	},
1157 	{
1158 		.procname	= "echo_ignore_all",
1159 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1160 		.maxlen		= sizeof(u8),
1161 		.mode		= 0644,
1162 		.proc_handler = proc_dou8vec_minmax,
1163 	},
1164 	{
1165 		.procname	= "echo_ignore_multicast",
1166 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1167 		.maxlen		= sizeof(u8),
1168 		.mode		= 0644,
1169 		.proc_handler = proc_dou8vec_minmax,
1170 	},
1171 	{
1172 		.procname	= "echo_ignore_anycast",
1173 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1174 		.maxlen		= sizeof(u8),
1175 		.mode		= 0644,
1176 		.proc_handler = proc_dou8vec_minmax,
1177 	},
1178 	{
1179 		.procname	= "ratemask",
1180 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1181 		.maxlen		= ICMPV6_MSG_MAX + 1,
1182 		.mode		= 0644,
1183 		.proc_handler = proc_do_large_bitmap,
1184 	},
1185 	{ },
1186 };
1187 
1188 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1189 {
1190 	struct ctl_table *table;
1191 
1192 	table = kmemdup(ipv6_icmp_table_template,
1193 			sizeof(ipv6_icmp_table_template),
1194 			GFP_KERNEL);
1195 
1196 	if (table) {
1197 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1198 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1199 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1200 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1201 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1202 	}
1203 	return table;
1204 }
1205 #endif
1206