xref: /openbmc/linux/net/ipv6/icmp.c (revision 545dbcd1)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180 	if (icmpv6_mask_allow(net, type))
181 		return true;
182 
183 	if (icmp_global_allow())
184 		return true;
185 
186 	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	if (!res)
229 		__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
230 				  ICMP6_MIB_RATELIMITHOST);
231 	dst_release(dst);
232 	return res;
233 }
234 
235 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
236 				  struct flowi6 *fl6)
237 {
238 	struct net *net = sock_net(sk);
239 	struct dst_entry *dst;
240 	bool res = false;
241 
242 	dst = ip6_route_output(net, sk, fl6);
243 	if (!dst->error) {
244 		struct rt6_info *rt = (struct rt6_info *)dst;
245 		struct in6_addr prefsrc;
246 
247 		rt6_get_prefsrc(rt, &prefsrc);
248 		res = !ipv6_addr_any(&prefsrc);
249 	}
250 	dst_release(dst);
251 	return res;
252 }
253 
254 /*
255  *	an inline helper for the "simple" if statement below
256  *	checks if parameter problem report is caused by an
257  *	unrecognized IPv6 option that has the Option Type
258  *	highest-order two bits set to 10
259  */
260 
261 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
262 {
263 	u8 _optval, *op;
264 
265 	offset += skb_network_offset(skb);
266 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
267 	if (!op)
268 		return true;
269 	return (*op & 0xC0) == 0x80;
270 }
271 
272 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
273 				struct icmp6hdr *thdr, int len)
274 {
275 	struct sk_buff *skb;
276 	struct icmp6hdr *icmp6h;
277 
278 	skb = skb_peek(&sk->sk_write_queue);
279 	if (!skb)
280 		return;
281 
282 	icmp6h = icmp6_hdr(skb);
283 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
284 	icmp6h->icmp6_cksum = 0;
285 
286 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
287 		skb->csum = csum_partial(icmp6h,
288 					sizeof(struct icmp6hdr), skb->csum);
289 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
290 						      &fl6->daddr,
291 						      len, fl6->flowi6_proto,
292 						      skb->csum);
293 	} else {
294 		__wsum tmp_csum = 0;
295 
296 		skb_queue_walk(&sk->sk_write_queue, skb) {
297 			tmp_csum = csum_add(tmp_csum, skb->csum);
298 		}
299 
300 		tmp_csum = csum_partial(icmp6h,
301 					sizeof(struct icmp6hdr), tmp_csum);
302 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
303 						      &fl6->daddr,
304 						      len, fl6->flowi6_proto,
305 						      tmp_csum);
306 	}
307 	ip6_push_pending_frames(sk);
308 }
309 
310 struct icmpv6_msg {
311 	struct sk_buff	*skb;
312 	int		offset;
313 	uint8_t		type;
314 };
315 
316 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
317 {
318 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
319 	struct sk_buff *org_skb = msg->skb;
320 	__wsum csum;
321 
322 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
323 				      to, len);
324 	skb->csum = csum_block_add(skb->csum, csum, odd);
325 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
326 		nf_ct_attach(skb, org_skb);
327 	return 0;
328 }
329 
330 #if IS_ENABLED(CONFIG_IPV6_MIP6)
331 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
332 {
333 	struct ipv6hdr *iph = ipv6_hdr(skb);
334 	struct ipv6_destopt_hao *hao;
335 	int off;
336 
337 	if (opt->dsthao) {
338 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
339 		if (likely(off >= 0)) {
340 			hao = (struct ipv6_destopt_hao *)
341 					(skb_network_header(skb) + off);
342 			swap(iph->saddr, hao->addr);
343 		}
344 	}
345 }
346 #else
347 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
348 #endif
349 
350 static struct dst_entry *icmpv6_route_lookup(struct net *net,
351 					     struct sk_buff *skb,
352 					     struct sock *sk,
353 					     struct flowi6 *fl6)
354 {
355 	struct dst_entry *dst, *dst2;
356 	struct flowi6 fl2;
357 	int err;
358 
359 	err = ip6_dst_lookup(net, sk, &dst, fl6);
360 	if (err)
361 		return ERR_PTR(err);
362 
363 	/*
364 	 * We won't send icmp if the destination is known
365 	 * anycast.
366 	 */
367 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
368 		net_dbg_ratelimited("icmp6_send: acast source\n");
369 		dst_release(dst);
370 		return ERR_PTR(-EINVAL);
371 	}
372 
373 	/* No need to clone since we're just using its address. */
374 	dst2 = dst;
375 
376 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
377 	if (!IS_ERR(dst)) {
378 		if (dst != dst2)
379 			return dst;
380 	} else {
381 		if (PTR_ERR(dst) == -EPERM)
382 			dst = NULL;
383 		else
384 			return dst;
385 	}
386 
387 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
388 	if (err)
389 		goto relookup_failed;
390 
391 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
392 	if (err)
393 		goto relookup_failed;
394 
395 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
396 	if (!IS_ERR(dst2)) {
397 		dst_release(dst);
398 		dst = dst2;
399 	} else {
400 		err = PTR_ERR(dst2);
401 		if (err == -EPERM) {
402 			dst_release(dst);
403 			return dst2;
404 		} else
405 			goto relookup_failed;
406 	}
407 
408 relookup_failed:
409 	if (dst)
410 		return dst;
411 	return ERR_PTR(err);
412 }
413 
414 static struct net_device *icmp6_dev(const struct sk_buff *skb)
415 {
416 	struct net_device *dev = skb->dev;
417 
418 	/* for local traffic to local address, skb dev is the loopback
419 	 * device. Check if there is a dst attached to the skb and if so
420 	 * get the real device index. Same is needed for replies to a link
421 	 * local address on a device enslaved to an L3 master device
422 	 */
423 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
424 		const struct rt6_info *rt6 = skb_rt6_info(skb);
425 
426 		if (rt6)
427 			dev = rt6->rt6i_idev->dev;
428 	}
429 
430 	return dev;
431 }
432 
433 static int icmp6_iif(const struct sk_buff *skb)
434 {
435 	return icmp6_dev(skb)->ifindex;
436 }
437 
438 /*
439  *	Send an ICMP message in response to a packet in error
440  */
441 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
442 		const struct in6_addr *force_saddr,
443 		const struct inet6_skb_parm *parm)
444 {
445 	struct inet6_dev *idev = NULL;
446 	struct ipv6hdr *hdr = ipv6_hdr(skb);
447 	struct sock *sk;
448 	struct net *net;
449 	struct ipv6_pinfo *np;
450 	const struct in6_addr *saddr = NULL;
451 	struct dst_entry *dst;
452 	struct icmp6hdr tmp_hdr;
453 	struct flowi6 fl6;
454 	struct icmpv6_msg msg;
455 	struct ipcm6_cookie ipc6;
456 	int iif = 0;
457 	int addr_type = 0;
458 	int len;
459 	u32 mark;
460 
461 	if ((u8 *)hdr < skb->head ||
462 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
463 		return;
464 
465 	if (!skb->dev)
466 		return;
467 	net = dev_net(skb->dev);
468 	mark = IP6_REPLY_MARK(net, skb->mark);
469 	/*
470 	 *	Make sure we respect the rules
471 	 *	i.e. RFC 1885 2.4(e)
472 	 *	Rule (e.1) is enforced by not using icmp6_send
473 	 *	in any code that processes icmp errors.
474 	 */
475 	addr_type = ipv6_addr_type(&hdr->daddr);
476 
477 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
478 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
479 		saddr = &hdr->daddr;
480 
481 	/*
482 	 *	Dest addr check
483 	 */
484 
485 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
486 		if (type != ICMPV6_PKT_TOOBIG &&
487 		    !(type == ICMPV6_PARAMPROB &&
488 		      code == ICMPV6_UNK_OPTION &&
489 		      (opt_unrec(skb, info))))
490 			return;
491 
492 		saddr = NULL;
493 	}
494 
495 	addr_type = ipv6_addr_type(&hdr->saddr);
496 
497 	/*
498 	 *	Source addr check
499 	 */
500 
501 	if (__ipv6_addr_needs_scope_id(addr_type)) {
502 		iif = icmp6_iif(skb);
503 	} else {
504 		/*
505 		 * The source device is used for looking up which routing table
506 		 * to use for sending an ICMP error.
507 		 */
508 		iif = l3mdev_master_ifindex(skb->dev);
509 	}
510 
511 	/*
512 	 *	Must not send error if the source does not uniquely
513 	 *	identify a single node (RFC2463 Section 2.4).
514 	 *	We check unspecified / multicast addresses here,
515 	 *	and anycast addresses will be checked later.
516 	 */
517 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
518 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
519 				    &hdr->saddr, &hdr->daddr);
520 		return;
521 	}
522 
523 	/*
524 	 *	Never answer to a ICMP packet.
525 	 */
526 	if (is_ineligible(skb)) {
527 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
528 				    &hdr->saddr, &hdr->daddr);
529 		return;
530 	}
531 
532 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
533 	local_bh_disable();
534 
535 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
536 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
537 		goto out_bh_enable;
538 
539 	mip6_addr_swap(skb, parm);
540 
541 	sk = icmpv6_xmit_lock(net);
542 	if (!sk)
543 		goto out_bh_enable;
544 
545 	memset(&fl6, 0, sizeof(fl6));
546 	fl6.flowi6_proto = IPPROTO_ICMPV6;
547 	fl6.daddr = hdr->saddr;
548 	if (force_saddr)
549 		saddr = force_saddr;
550 	if (saddr) {
551 		fl6.saddr = *saddr;
552 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
553 		/* select a more meaningful saddr from input if */
554 		struct net_device *in_netdev;
555 
556 		in_netdev = dev_get_by_index(net, parm->iif);
557 		if (in_netdev) {
558 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
559 					   inet6_sk(sk)->srcprefs,
560 					   &fl6.saddr);
561 			dev_put(in_netdev);
562 		}
563 	}
564 	fl6.flowi6_mark = mark;
565 	fl6.flowi6_oif = iif;
566 	fl6.fl6_icmp_type = type;
567 	fl6.fl6_icmp_code = code;
568 	fl6.flowi6_uid = sock_net_uid(net, NULL);
569 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
570 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
571 
572 	np = inet6_sk(sk);
573 
574 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
575 		goto out;
576 
577 	tmp_hdr.icmp6_type = type;
578 	tmp_hdr.icmp6_code = code;
579 	tmp_hdr.icmp6_cksum = 0;
580 	tmp_hdr.icmp6_pointer = htonl(info);
581 
582 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
583 		fl6.flowi6_oif = np->mcast_oif;
584 	else if (!fl6.flowi6_oif)
585 		fl6.flowi6_oif = np->ucast_oif;
586 
587 	ipcm6_init_sk(&ipc6, np);
588 	ipc6.sockc.mark = mark;
589 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
590 
591 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
592 	if (IS_ERR(dst))
593 		goto out;
594 
595 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
596 
597 	msg.skb = skb;
598 	msg.offset = skb_network_offset(skb);
599 	msg.type = type;
600 
601 	len = skb->len - msg.offset;
602 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
603 	if (len < 0) {
604 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
605 				    &hdr->saddr, &hdr->daddr);
606 		goto out_dst_release;
607 	}
608 
609 	rcu_read_lock();
610 	idev = __in6_dev_get(skb->dev);
611 
612 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
613 			    len + sizeof(struct icmp6hdr),
614 			    sizeof(struct icmp6hdr),
615 			    &ipc6, &fl6, (struct rt6_info *)dst,
616 			    MSG_DONTWAIT)) {
617 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
618 		ip6_flush_pending_frames(sk);
619 	} else {
620 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
621 					   len + sizeof(struct icmp6hdr));
622 	}
623 	rcu_read_unlock();
624 out_dst_release:
625 	dst_release(dst);
626 out:
627 	icmpv6_xmit_unlock(sk);
628 out_bh_enable:
629 	local_bh_enable();
630 }
631 EXPORT_SYMBOL(icmp6_send);
632 
633 /* Slightly more convenient version of icmp6_send with drop reasons.
634  */
635 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
636 			      enum skb_drop_reason reason)
637 {
638 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
639 	kfree_skb_reason(skb, reason);
640 }
641 
642 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
643  * if sufficient data bytes are available
644  * @nhs is the size of the tunnel header(s) :
645  *  Either an IPv4 header for SIT encap
646  *         an IPv4 header + GRE header for GRE encap
647  */
648 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
649 			       unsigned int data_len)
650 {
651 	struct in6_addr temp_saddr;
652 	struct rt6_info *rt;
653 	struct sk_buff *skb2;
654 	u32 info = 0;
655 
656 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
657 		return 1;
658 
659 	/* RFC 4884 (partial) support for ICMP extensions */
660 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
661 		data_len = 0;
662 
663 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
664 
665 	if (!skb2)
666 		return 1;
667 
668 	skb_dst_drop(skb2);
669 	skb_pull(skb2, nhs);
670 	skb_reset_network_header(skb2);
671 
672 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
673 			skb, 0);
674 
675 	if (rt && rt->dst.dev)
676 		skb2->dev = rt->dst.dev;
677 
678 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
679 
680 	if (data_len) {
681 		/* RFC 4884 (partial) support :
682 		 * insert 0 padding at the end, before the extensions
683 		 */
684 		__skb_push(skb2, nhs);
685 		skb_reset_network_header(skb2);
686 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
687 		memset(skb2->data + data_len - nhs, 0, nhs);
688 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
689 		 * and stored in reserved[0]
690 		 */
691 		info = (data_len/8) << 24;
692 	}
693 	if (type == ICMP_TIME_EXCEEDED)
694 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
695 			   info, &temp_saddr, IP6CB(skb2));
696 	else
697 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
698 			   info, &temp_saddr, IP6CB(skb2));
699 	if (rt)
700 		ip6_rt_put(rt);
701 
702 	kfree_skb(skb2);
703 
704 	return 0;
705 }
706 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
707 
708 static void icmpv6_echo_reply(struct sk_buff *skb)
709 {
710 	struct net *net = dev_net(skb->dev);
711 	struct sock *sk;
712 	struct inet6_dev *idev;
713 	struct ipv6_pinfo *np;
714 	const struct in6_addr *saddr = NULL;
715 	struct icmp6hdr *icmph = icmp6_hdr(skb);
716 	struct icmp6hdr tmp_hdr;
717 	struct flowi6 fl6;
718 	struct icmpv6_msg msg;
719 	struct dst_entry *dst;
720 	struct ipcm6_cookie ipc6;
721 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
722 	bool acast;
723 	u8 type;
724 
725 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
726 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
727 		return;
728 
729 	saddr = &ipv6_hdr(skb)->daddr;
730 
731 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
732 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
733 		return;
734 
735 	if (!ipv6_unicast_destination(skb) &&
736 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
737 		saddr = NULL;
738 
739 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
740 		type = ICMPV6_EXT_ECHO_REPLY;
741 	else
742 		type = ICMPV6_ECHO_REPLY;
743 
744 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
745 	tmp_hdr.icmp6_type = type;
746 
747 	memset(&fl6, 0, sizeof(fl6));
748 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
749 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
750 
751 	fl6.flowi6_proto = IPPROTO_ICMPV6;
752 	fl6.daddr = ipv6_hdr(skb)->saddr;
753 	if (saddr)
754 		fl6.saddr = *saddr;
755 	fl6.flowi6_oif = icmp6_iif(skb);
756 	fl6.fl6_icmp_type = type;
757 	fl6.flowi6_mark = mark;
758 	fl6.flowi6_uid = sock_net_uid(net, NULL);
759 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
760 
761 	local_bh_disable();
762 	sk = icmpv6_xmit_lock(net);
763 	if (!sk)
764 		goto out_bh_enable;
765 	np = inet6_sk(sk);
766 
767 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
768 		fl6.flowi6_oif = np->mcast_oif;
769 	else if (!fl6.flowi6_oif)
770 		fl6.flowi6_oif = np->ucast_oif;
771 
772 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
773 		goto out;
774 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
775 	if (IS_ERR(dst))
776 		goto out;
777 
778 	/* Check the ratelimit */
779 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
780 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
781 		goto out_dst_release;
782 
783 	idev = __in6_dev_get(skb->dev);
784 
785 	msg.skb = skb;
786 	msg.offset = 0;
787 	msg.type = type;
788 
789 	ipcm6_init_sk(&ipc6, np);
790 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
791 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
792 	ipc6.sockc.mark = mark;
793 
794 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
795 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
796 			goto out_dst_release;
797 
798 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
799 			    skb->len + sizeof(struct icmp6hdr),
800 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
801 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
802 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
803 		ip6_flush_pending_frames(sk);
804 	} else {
805 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
806 					   skb->len + sizeof(struct icmp6hdr));
807 	}
808 out_dst_release:
809 	dst_release(dst);
810 out:
811 	icmpv6_xmit_unlock(sk);
812 out_bh_enable:
813 	local_bh_enable();
814 }
815 
816 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
817 				   u8 code, __be32 info)
818 {
819 	struct inet6_skb_parm *opt = IP6CB(skb);
820 	struct net *net = dev_net(skb->dev);
821 	const struct inet6_protocol *ipprot;
822 	enum skb_drop_reason reason;
823 	int inner_offset;
824 	__be16 frag_off;
825 	u8 nexthdr;
826 
827 	reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
828 	if (reason != SKB_NOT_DROPPED_YET)
829 		goto out;
830 
831 	seg6_icmp_srh(skb, opt);
832 
833 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
834 	if (ipv6_ext_hdr(nexthdr)) {
835 		/* now skip over extension headers */
836 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
837 						&nexthdr, &frag_off);
838 		if (inner_offset < 0) {
839 			SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
840 			goto out;
841 		}
842 	} else {
843 		inner_offset = sizeof(struct ipv6hdr);
844 	}
845 
846 	/* Checkin header including 8 bytes of inner protocol header. */
847 	reason = pskb_may_pull_reason(skb, inner_offset + 8);
848 	if (reason != SKB_NOT_DROPPED_YET)
849 		goto out;
850 
851 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
852 	   Without this we will not able f.e. to make source routed
853 	   pmtu discovery.
854 	   Corresponding argument (opt) to notifiers is already added.
855 	   --ANK (980726)
856 	 */
857 
858 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
859 	if (ipprot && ipprot->err_handler)
860 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
861 
862 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
863 	return SKB_CONSUMED;
864 
865 out:
866 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
867 	return reason;
868 }
869 
870 /*
871  *	Handle icmp messages
872  */
873 
874 static int icmpv6_rcv(struct sk_buff *skb)
875 {
876 	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
877 	struct net *net = dev_net(skb->dev);
878 	struct net_device *dev = icmp6_dev(skb);
879 	struct inet6_dev *idev = __in6_dev_get(dev);
880 	const struct in6_addr *saddr, *daddr;
881 	struct icmp6hdr *hdr;
882 	u8 type;
883 
884 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
885 		struct sec_path *sp = skb_sec_path(skb);
886 		int nh;
887 
888 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
889 				 XFRM_STATE_ICMP)) {
890 			reason = SKB_DROP_REASON_XFRM_POLICY;
891 			goto drop_no_count;
892 		}
893 
894 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
895 			goto drop_no_count;
896 
897 		nh = skb_network_offset(skb);
898 		skb_set_network_header(skb, sizeof(*hdr));
899 
900 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
901 						skb)) {
902 			reason = SKB_DROP_REASON_XFRM_POLICY;
903 			goto drop_no_count;
904 		}
905 
906 		skb_set_network_header(skb, nh);
907 	}
908 
909 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
910 
911 	saddr = &ipv6_hdr(skb)->saddr;
912 	daddr = &ipv6_hdr(skb)->daddr;
913 
914 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
915 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
916 				    saddr, daddr);
917 		goto csum_error;
918 	}
919 
920 	if (!pskb_pull(skb, sizeof(*hdr)))
921 		goto discard_it;
922 
923 	hdr = icmp6_hdr(skb);
924 
925 	type = hdr->icmp6_type;
926 
927 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
928 
929 	switch (type) {
930 	case ICMPV6_ECHO_REQUEST:
931 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
932 			icmpv6_echo_reply(skb);
933 		break;
934 	case ICMPV6_EXT_ECHO_REQUEST:
935 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
936 		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
937 			icmpv6_echo_reply(skb);
938 		break;
939 
940 	case ICMPV6_ECHO_REPLY:
941 		reason = ping_rcv(skb);
942 		break;
943 
944 	case ICMPV6_EXT_ECHO_REPLY:
945 		reason = ping_rcv(skb);
946 		break;
947 
948 	case ICMPV6_PKT_TOOBIG:
949 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
950 		   standard destination cache. Seems, only "advanced"
951 		   destination cache will allow to solve this problem
952 		   --ANK (980726)
953 		 */
954 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
955 			goto discard_it;
956 		hdr = icmp6_hdr(skb);
957 
958 		/* to notify */
959 		fallthrough;
960 	case ICMPV6_DEST_UNREACH:
961 	case ICMPV6_TIME_EXCEED:
962 	case ICMPV6_PARAMPROB:
963 		reason = icmpv6_notify(skb, type, hdr->icmp6_code,
964 				       hdr->icmp6_mtu);
965 		break;
966 
967 	case NDISC_ROUTER_SOLICITATION:
968 	case NDISC_ROUTER_ADVERTISEMENT:
969 	case NDISC_NEIGHBOUR_SOLICITATION:
970 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
971 	case NDISC_REDIRECT:
972 		reason = ndisc_rcv(skb);
973 		break;
974 
975 	case ICMPV6_MGM_QUERY:
976 		igmp6_event_query(skb);
977 		return 0;
978 
979 	case ICMPV6_MGM_REPORT:
980 		igmp6_event_report(skb);
981 		return 0;
982 
983 	case ICMPV6_MGM_REDUCTION:
984 	case ICMPV6_NI_QUERY:
985 	case ICMPV6_NI_REPLY:
986 	case ICMPV6_MLD2_REPORT:
987 	case ICMPV6_DHAAD_REQUEST:
988 	case ICMPV6_DHAAD_REPLY:
989 	case ICMPV6_MOBILE_PREFIX_SOL:
990 	case ICMPV6_MOBILE_PREFIX_ADV:
991 		break;
992 
993 	default:
994 		/* informational */
995 		if (type & ICMPV6_INFOMSG_MASK)
996 			break;
997 
998 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
999 				    saddr, daddr);
1000 
1001 		/*
1002 		 * error of unknown type.
1003 		 * must pass to upper level
1004 		 */
1005 
1006 		reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1007 				       hdr->icmp6_mtu);
1008 	}
1009 
1010 	/* until the v6 path can be better sorted assume failure and
1011 	 * preserve the status quo behaviour for the rest of the paths to here
1012 	 */
1013 	if (reason)
1014 		kfree_skb_reason(skb, reason);
1015 	else
1016 		consume_skb(skb);
1017 
1018 	return 0;
1019 
1020 csum_error:
1021 	reason = SKB_DROP_REASON_ICMP_CSUM;
1022 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1023 discard_it:
1024 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1025 drop_no_count:
1026 	kfree_skb_reason(skb, reason);
1027 	return 0;
1028 }
1029 
1030 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1031 		      u8 type,
1032 		      const struct in6_addr *saddr,
1033 		      const struct in6_addr *daddr,
1034 		      int oif)
1035 {
1036 	memset(fl6, 0, sizeof(*fl6));
1037 	fl6->saddr = *saddr;
1038 	fl6->daddr = *daddr;
1039 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1040 	fl6->fl6_icmp_type	= type;
1041 	fl6->fl6_icmp_code	= 0;
1042 	fl6->flowi6_oif		= oif;
1043 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1044 }
1045 
1046 int __init icmpv6_init(void)
1047 {
1048 	struct sock *sk;
1049 	int err, i;
1050 
1051 	for_each_possible_cpu(i) {
1052 		err = inet_ctl_sock_create(&sk, PF_INET6,
1053 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1054 		if (err < 0) {
1055 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1056 			       err);
1057 			return err;
1058 		}
1059 
1060 		per_cpu(ipv6_icmp_sk, i) = sk;
1061 
1062 		/* Enough space for 2 64K ICMP packets, including
1063 		 * sk_buff struct overhead.
1064 		 */
1065 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1066 	}
1067 
1068 	err = -EAGAIN;
1069 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1070 		goto fail;
1071 
1072 	err = inet6_register_icmp_sender(icmp6_send);
1073 	if (err)
1074 		goto sender_reg_err;
1075 	return 0;
1076 
1077 sender_reg_err:
1078 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1079 fail:
1080 	pr_err("Failed to register ICMP6 protocol\n");
1081 	return err;
1082 }
1083 
1084 void icmpv6_cleanup(void)
1085 {
1086 	inet6_unregister_icmp_sender(icmp6_send);
1087 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1088 }
1089 
1090 
1091 static const struct icmp6_err {
1092 	int err;
1093 	int fatal;
1094 } tab_unreach[] = {
1095 	{	/* NOROUTE */
1096 		.err	= ENETUNREACH,
1097 		.fatal	= 0,
1098 	},
1099 	{	/* ADM_PROHIBITED */
1100 		.err	= EACCES,
1101 		.fatal	= 1,
1102 	},
1103 	{	/* Was NOT_NEIGHBOUR, now reserved */
1104 		.err	= EHOSTUNREACH,
1105 		.fatal	= 0,
1106 	},
1107 	{	/* ADDR_UNREACH	*/
1108 		.err	= EHOSTUNREACH,
1109 		.fatal	= 0,
1110 	},
1111 	{	/* PORT_UNREACH	*/
1112 		.err	= ECONNREFUSED,
1113 		.fatal	= 1,
1114 	},
1115 	{	/* POLICY_FAIL */
1116 		.err	= EACCES,
1117 		.fatal	= 1,
1118 	},
1119 	{	/* REJECT_ROUTE	*/
1120 		.err	= EACCES,
1121 		.fatal	= 1,
1122 	},
1123 };
1124 
1125 int icmpv6_err_convert(u8 type, u8 code, int *err)
1126 {
1127 	int fatal = 0;
1128 
1129 	*err = EPROTO;
1130 
1131 	switch (type) {
1132 	case ICMPV6_DEST_UNREACH:
1133 		fatal = 1;
1134 		if (code < ARRAY_SIZE(tab_unreach)) {
1135 			*err  = tab_unreach[code].err;
1136 			fatal = tab_unreach[code].fatal;
1137 		}
1138 		break;
1139 
1140 	case ICMPV6_PKT_TOOBIG:
1141 		*err = EMSGSIZE;
1142 		break;
1143 
1144 	case ICMPV6_PARAMPROB:
1145 		*err = EPROTO;
1146 		fatal = 1;
1147 		break;
1148 
1149 	case ICMPV6_TIME_EXCEED:
1150 		*err = EHOSTUNREACH;
1151 		break;
1152 	}
1153 
1154 	return fatal;
1155 }
1156 EXPORT_SYMBOL(icmpv6_err_convert);
1157 
1158 #ifdef CONFIG_SYSCTL
1159 static struct ctl_table ipv6_icmp_table_template[] = {
1160 	{
1161 		.procname	= "ratelimit",
1162 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1163 		.maxlen		= sizeof(int),
1164 		.mode		= 0644,
1165 		.proc_handler	= proc_dointvec_ms_jiffies,
1166 	},
1167 	{
1168 		.procname	= "echo_ignore_all",
1169 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1170 		.maxlen		= sizeof(u8),
1171 		.mode		= 0644,
1172 		.proc_handler = proc_dou8vec_minmax,
1173 	},
1174 	{
1175 		.procname	= "echo_ignore_multicast",
1176 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1177 		.maxlen		= sizeof(u8),
1178 		.mode		= 0644,
1179 		.proc_handler = proc_dou8vec_minmax,
1180 	},
1181 	{
1182 		.procname	= "echo_ignore_anycast",
1183 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1184 		.maxlen		= sizeof(u8),
1185 		.mode		= 0644,
1186 		.proc_handler = proc_dou8vec_minmax,
1187 	},
1188 	{
1189 		.procname	= "ratemask",
1190 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1191 		.maxlen		= ICMPV6_MSG_MAX + 1,
1192 		.mode		= 0644,
1193 		.proc_handler = proc_do_large_bitmap,
1194 	},
1195 	{ },
1196 };
1197 
1198 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1199 {
1200 	struct ctl_table *table;
1201 
1202 	table = kmemdup(ipv6_icmp_table_template,
1203 			sizeof(ipv6_icmp_table_template),
1204 			GFP_KERNEL);
1205 
1206 	if (table) {
1207 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1208 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1209 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1210 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1211 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1212 	}
1213 	return table;
1214 }
1215 #endif
1216