xref: /openbmc/linux/net/ipv6/icmp.c (revision d0941130)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180 	if (icmpv6_mask_allow(net, type))
181 		return true;
182 
183 	if (icmp_global_allow())
184 		return true;
185 
186 	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	if (!res)
229 		__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
230 				  ICMP6_MIB_RATELIMITHOST);
231 	dst_release(dst);
232 	return res;
233 }
234 
235 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
236 				  struct flowi6 *fl6)
237 {
238 	struct net *net = sock_net(sk);
239 	struct dst_entry *dst;
240 	bool res = false;
241 
242 	dst = ip6_route_output(net, sk, fl6);
243 	if (!dst->error) {
244 		struct rt6_info *rt = (struct rt6_info *)dst;
245 		struct in6_addr prefsrc;
246 
247 		rt6_get_prefsrc(rt, &prefsrc);
248 		res = !ipv6_addr_any(&prefsrc);
249 	}
250 	dst_release(dst);
251 	return res;
252 }
253 
254 /*
255  *	an inline helper for the "simple" if statement below
256  *	checks if parameter problem report is caused by an
257  *	unrecognized IPv6 option that has the Option Type
258  *	highest-order two bits set to 10
259  */
260 
261 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
262 {
263 	u8 _optval, *op;
264 
265 	offset += skb_network_offset(skb);
266 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
267 	if (!op)
268 		return true;
269 	return (*op & 0xC0) == 0x80;
270 }
271 
272 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
273 				struct icmp6hdr *thdr, int len)
274 {
275 	struct sk_buff *skb;
276 	struct icmp6hdr *icmp6h;
277 
278 	skb = skb_peek(&sk->sk_write_queue);
279 	if (!skb)
280 		return;
281 
282 	icmp6h = icmp6_hdr(skb);
283 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
284 	icmp6h->icmp6_cksum = 0;
285 
286 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
287 		skb->csum = csum_partial(icmp6h,
288 					sizeof(struct icmp6hdr), skb->csum);
289 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
290 						      &fl6->daddr,
291 						      len, fl6->flowi6_proto,
292 						      skb->csum);
293 	} else {
294 		__wsum tmp_csum = 0;
295 
296 		skb_queue_walk(&sk->sk_write_queue, skb) {
297 			tmp_csum = csum_add(tmp_csum, skb->csum);
298 		}
299 
300 		tmp_csum = csum_partial(icmp6h,
301 					sizeof(struct icmp6hdr), tmp_csum);
302 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
303 						      &fl6->daddr,
304 						      len, fl6->flowi6_proto,
305 						      tmp_csum);
306 	}
307 	ip6_push_pending_frames(sk);
308 }
309 
310 struct icmpv6_msg {
311 	struct sk_buff	*skb;
312 	int		offset;
313 	uint8_t		type;
314 };
315 
316 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
317 {
318 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
319 	struct sk_buff *org_skb = msg->skb;
320 	__wsum csum;
321 
322 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
323 				      to, len);
324 	skb->csum = csum_block_add(skb->csum, csum, odd);
325 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
326 		nf_ct_attach(skb, org_skb);
327 	return 0;
328 }
329 
330 #if IS_ENABLED(CONFIG_IPV6_MIP6)
331 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
332 {
333 	struct ipv6hdr *iph = ipv6_hdr(skb);
334 	struct ipv6_destopt_hao *hao;
335 	struct in6_addr tmp;
336 	int off;
337 
338 	if (opt->dsthao) {
339 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
340 		if (likely(off >= 0)) {
341 			hao = (struct ipv6_destopt_hao *)
342 					(skb_network_header(skb) + off);
343 			tmp = iph->saddr;
344 			iph->saddr = hao->addr;
345 			hao->addr = tmp;
346 		}
347 	}
348 }
349 #else
350 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
351 #endif
352 
353 static struct dst_entry *icmpv6_route_lookup(struct net *net,
354 					     struct sk_buff *skb,
355 					     struct sock *sk,
356 					     struct flowi6 *fl6)
357 {
358 	struct dst_entry *dst, *dst2;
359 	struct flowi6 fl2;
360 	int err;
361 
362 	err = ip6_dst_lookup(net, sk, &dst, fl6);
363 	if (err)
364 		return ERR_PTR(err);
365 
366 	/*
367 	 * We won't send icmp if the destination is known
368 	 * anycast.
369 	 */
370 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
371 		net_dbg_ratelimited("icmp6_send: acast source\n");
372 		dst_release(dst);
373 		return ERR_PTR(-EINVAL);
374 	}
375 
376 	/* No need to clone since we're just using its address. */
377 	dst2 = dst;
378 
379 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
380 	if (!IS_ERR(dst)) {
381 		if (dst != dst2)
382 			return dst;
383 	} else {
384 		if (PTR_ERR(dst) == -EPERM)
385 			dst = NULL;
386 		else
387 			return dst;
388 	}
389 
390 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
391 	if (err)
392 		goto relookup_failed;
393 
394 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
395 	if (err)
396 		goto relookup_failed;
397 
398 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
399 	if (!IS_ERR(dst2)) {
400 		dst_release(dst);
401 		dst = dst2;
402 	} else {
403 		err = PTR_ERR(dst2);
404 		if (err == -EPERM) {
405 			dst_release(dst);
406 			return dst2;
407 		} else
408 			goto relookup_failed;
409 	}
410 
411 relookup_failed:
412 	if (dst)
413 		return dst;
414 	return ERR_PTR(err);
415 }
416 
417 static struct net_device *icmp6_dev(const struct sk_buff *skb)
418 {
419 	struct net_device *dev = skb->dev;
420 
421 	/* for local traffic to local address, skb dev is the loopback
422 	 * device. Check if there is a dst attached to the skb and if so
423 	 * get the real device index. Same is needed for replies to a link
424 	 * local address on a device enslaved to an L3 master device
425 	 */
426 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
427 		const struct rt6_info *rt6 = skb_rt6_info(skb);
428 
429 		if (rt6)
430 			dev = rt6->rt6i_idev->dev;
431 	}
432 
433 	return dev;
434 }
435 
436 static int icmp6_iif(const struct sk_buff *skb)
437 {
438 	return icmp6_dev(skb)->ifindex;
439 }
440 
441 /*
442  *	Send an ICMP message in response to a packet in error
443  */
444 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
445 		const struct in6_addr *force_saddr,
446 		const struct inet6_skb_parm *parm)
447 {
448 	struct inet6_dev *idev = NULL;
449 	struct ipv6hdr *hdr = ipv6_hdr(skb);
450 	struct sock *sk;
451 	struct net *net;
452 	struct ipv6_pinfo *np;
453 	const struct in6_addr *saddr = NULL;
454 	struct dst_entry *dst;
455 	struct icmp6hdr tmp_hdr;
456 	struct flowi6 fl6;
457 	struct icmpv6_msg msg;
458 	struct ipcm6_cookie ipc6;
459 	int iif = 0;
460 	int addr_type = 0;
461 	int len;
462 	u32 mark;
463 
464 	if ((u8 *)hdr < skb->head ||
465 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
466 		return;
467 
468 	if (!skb->dev)
469 		return;
470 	net = dev_net(skb->dev);
471 	mark = IP6_REPLY_MARK(net, skb->mark);
472 	/*
473 	 *	Make sure we respect the rules
474 	 *	i.e. RFC 1885 2.4(e)
475 	 *	Rule (e.1) is enforced by not using icmp6_send
476 	 *	in any code that processes icmp errors.
477 	 */
478 	addr_type = ipv6_addr_type(&hdr->daddr);
479 
480 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
481 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
482 		saddr = &hdr->daddr;
483 
484 	/*
485 	 *	Dest addr check
486 	 */
487 
488 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
489 		if (type != ICMPV6_PKT_TOOBIG &&
490 		    !(type == ICMPV6_PARAMPROB &&
491 		      code == ICMPV6_UNK_OPTION &&
492 		      (opt_unrec(skb, info))))
493 			return;
494 
495 		saddr = NULL;
496 	}
497 
498 	addr_type = ipv6_addr_type(&hdr->saddr);
499 
500 	/*
501 	 *	Source addr check
502 	 */
503 
504 	if (__ipv6_addr_needs_scope_id(addr_type)) {
505 		iif = icmp6_iif(skb);
506 	} else {
507 		/*
508 		 * The source device is used for looking up which routing table
509 		 * to use for sending an ICMP error.
510 		 */
511 		iif = l3mdev_master_ifindex(skb->dev);
512 	}
513 
514 	/*
515 	 *	Must not send error if the source does not uniquely
516 	 *	identify a single node (RFC2463 Section 2.4).
517 	 *	We check unspecified / multicast addresses here,
518 	 *	and anycast addresses will be checked later.
519 	 */
520 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
521 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
522 				    &hdr->saddr, &hdr->daddr);
523 		return;
524 	}
525 
526 	/*
527 	 *	Never answer to a ICMP packet.
528 	 */
529 	if (is_ineligible(skb)) {
530 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
531 				    &hdr->saddr, &hdr->daddr);
532 		return;
533 	}
534 
535 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
536 	local_bh_disable();
537 
538 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
539 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
540 		goto out_bh_enable;
541 
542 	mip6_addr_swap(skb, parm);
543 
544 	sk = icmpv6_xmit_lock(net);
545 	if (!sk)
546 		goto out_bh_enable;
547 
548 	memset(&fl6, 0, sizeof(fl6));
549 	fl6.flowi6_proto = IPPROTO_ICMPV6;
550 	fl6.daddr = hdr->saddr;
551 	if (force_saddr)
552 		saddr = force_saddr;
553 	if (saddr) {
554 		fl6.saddr = *saddr;
555 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
556 		/* select a more meaningful saddr from input if */
557 		struct net_device *in_netdev;
558 
559 		in_netdev = dev_get_by_index(net, parm->iif);
560 		if (in_netdev) {
561 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
562 					   inet6_sk(sk)->srcprefs,
563 					   &fl6.saddr);
564 			dev_put(in_netdev);
565 		}
566 	}
567 	fl6.flowi6_mark = mark;
568 	fl6.flowi6_oif = iif;
569 	fl6.fl6_icmp_type = type;
570 	fl6.fl6_icmp_code = code;
571 	fl6.flowi6_uid = sock_net_uid(net, NULL);
572 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
573 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
574 
575 	np = inet6_sk(sk);
576 
577 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
578 		goto out;
579 
580 	tmp_hdr.icmp6_type = type;
581 	tmp_hdr.icmp6_code = code;
582 	tmp_hdr.icmp6_cksum = 0;
583 	tmp_hdr.icmp6_pointer = htonl(info);
584 
585 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
586 		fl6.flowi6_oif = np->mcast_oif;
587 	else if (!fl6.flowi6_oif)
588 		fl6.flowi6_oif = np->ucast_oif;
589 
590 	ipcm6_init_sk(&ipc6, np);
591 	ipc6.sockc.mark = mark;
592 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
593 
594 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
595 	if (IS_ERR(dst))
596 		goto out;
597 
598 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
599 
600 	msg.skb = skb;
601 	msg.offset = skb_network_offset(skb);
602 	msg.type = type;
603 
604 	len = skb->len - msg.offset;
605 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
606 	if (len < 0) {
607 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
608 				    &hdr->saddr, &hdr->daddr);
609 		goto out_dst_release;
610 	}
611 
612 	rcu_read_lock();
613 	idev = __in6_dev_get(skb->dev);
614 
615 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
616 			    len + sizeof(struct icmp6hdr),
617 			    sizeof(struct icmp6hdr),
618 			    &ipc6, &fl6, (struct rt6_info *)dst,
619 			    MSG_DONTWAIT)) {
620 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
621 		ip6_flush_pending_frames(sk);
622 	} else {
623 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
624 					   len + sizeof(struct icmp6hdr));
625 	}
626 	rcu_read_unlock();
627 out_dst_release:
628 	dst_release(dst);
629 out:
630 	icmpv6_xmit_unlock(sk);
631 out_bh_enable:
632 	local_bh_enable();
633 }
634 EXPORT_SYMBOL(icmp6_send);
635 
636 /* Slightly more convenient version of icmp6_send with drop reasons.
637  */
638 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
639 			      enum skb_drop_reason reason)
640 {
641 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
642 	kfree_skb_reason(skb, reason);
643 }
644 
645 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
646  * if sufficient data bytes are available
647  * @nhs is the size of the tunnel header(s) :
648  *  Either an IPv4 header for SIT encap
649  *         an IPv4 header + GRE header for GRE encap
650  */
651 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
652 			       unsigned int data_len)
653 {
654 	struct in6_addr temp_saddr;
655 	struct rt6_info *rt;
656 	struct sk_buff *skb2;
657 	u32 info = 0;
658 
659 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
660 		return 1;
661 
662 	/* RFC 4884 (partial) support for ICMP extensions */
663 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
664 		data_len = 0;
665 
666 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
667 
668 	if (!skb2)
669 		return 1;
670 
671 	skb_dst_drop(skb2);
672 	skb_pull(skb2, nhs);
673 	skb_reset_network_header(skb2);
674 
675 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
676 			skb, 0);
677 
678 	if (rt && rt->dst.dev)
679 		skb2->dev = rt->dst.dev;
680 
681 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
682 
683 	if (data_len) {
684 		/* RFC 4884 (partial) support :
685 		 * insert 0 padding at the end, before the extensions
686 		 */
687 		__skb_push(skb2, nhs);
688 		skb_reset_network_header(skb2);
689 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
690 		memset(skb2->data + data_len - nhs, 0, nhs);
691 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
692 		 * and stored in reserved[0]
693 		 */
694 		info = (data_len/8) << 24;
695 	}
696 	if (type == ICMP_TIME_EXCEEDED)
697 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
698 			   info, &temp_saddr, IP6CB(skb2));
699 	else
700 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
701 			   info, &temp_saddr, IP6CB(skb2));
702 	if (rt)
703 		ip6_rt_put(rt);
704 
705 	kfree_skb(skb2);
706 
707 	return 0;
708 }
709 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
710 
711 static void icmpv6_echo_reply(struct sk_buff *skb)
712 {
713 	struct net *net = dev_net(skb->dev);
714 	struct sock *sk;
715 	struct inet6_dev *idev;
716 	struct ipv6_pinfo *np;
717 	const struct in6_addr *saddr = NULL;
718 	struct icmp6hdr *icmph = icmp6_hdr(skb);
719 	struct icmp6hdr tmp_hdr;
720 	struct flowi6 fl6;
721 	struct icmpv6_msg msg;
722 	struct dst_entry *dst;
723 	struct ipcm6_cookie ipc6;
724 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
725 	bool acast;
726 	u8 type;
727 
728 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
729 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
730 		return;
731 
732 	saddr = &ipv6_hdr(skb)->daddr;
733 
734 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
735 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
736 		return;
737 
738 	if (!ipv6_unicast_destination(skb) &&
739 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
740 		saddr = NULL;
741 
742 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
743 		type = ICMPV6_EXT_ECHO_REPLY;
744 	else
745 		type = ICMPV6_ECHO_REPLY;
746 
747 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
748 	tmp_hdr.icmp6_type = type;
749 
750 	memset(&fl6, 0, sizeof(fl6));
751 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
752 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
753 
754 	fl6.flowi6_proto = IPPROTO_ICMPV6;
755 	fl6.daddr = ipv6_hdr(skb)->saddr;
756 	if (saddr)
757 		fl6.saddr = *saddr;
758 	fl6.flowi6_oif = icmp6_iif(skb);
759 	fl6.fl6_icmp_type = type;
760 	fl6.flowi6_mark = mark;
761 	fl6.flowi6_uid = sock_net_uid(net, NULL);
762 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
763 
764 	local_bh_disable();
765 	sk = icmpv6_xmit_lock(net);
766 	if (!sk)
767 		goto out_bh_enable;
768 	np = inet6_sk(sk);
769 
770 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
771 		fl6.flowi6_oif = np->mcast_oif;
772 	else if (!fl6.flowi6_oif)
773 		fl6.flowi6_oif = np->ucast_oif;
774 
775 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
776 		goto out;
777 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
778 	if (IS_ERR(dst))
779 		goto out;
780 
781 	/* Check the ratelimit */
782 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
783 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
784 		goto out_dst_release;
785 
786 	idev = __in6_dev_get(skb->dev);
787 
788 	msg.skb = skb;
789 	msg.offset = 0;
790 	msg.type = type;
791 
792 	ipcm6_init_sk(&ipc6, np);
793 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
794 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
795 	ipc6.sockc.mark = mark;
796 
797 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
798 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
799 			goto out_dst_release;
800 
801 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
802 			    skb->len + sizeof(struct icmp6hdr),
803 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
804 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
805 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
806 		ip6_flush_pending_frames(sk);
807 	} else {
808 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
809 					   skb->len + sizeof(struct icmp6hdr));
810 	}
811 out_dst_release:
812 	dst_release(dst);
813 out:
814 	icmpv6_xmit_unlock(sk);
815 out_bh_enable:
816 	local_bh_enable();
817 }
818 
819 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
820 {
821 	struct inet6_skb_parm *opt = IP6CB(skb);
822 	const struct inet6_protocol *ipprot;
823 	int inner_offset;
824 	__be16 frag_off;
825 	u8 nexthdr;
826 	struct net *net = dev_net(skb->dev);
827 
828 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
829 		goto out;
830 
831 	seg6_icmp_srh(skb, opt);
832 
833 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
834 	if (ipv6_ext_hdr(nexthdr)) {
835 		/* now skip over extension headers */
836 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
837 						&nexthdr, &frag_off);
838 		if (inner_offset < 0)
839 			goto out;
840 	} else {
841 		inner_offset = sizeof(struct ipv6hdr);
842 	}
843 
844 	/* Checkin header including 8 bytes of inner protocol header. */
845 	if (!pskb_may_pull(skb, inner_offset+8))
846 		goto out;
847 
848 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
849 	   Without this we will not able f.e. to make source routed
850 	   pmtu discovery.
851 	   Corresponding argument (opt) to notifiers is already added.
852 	   --ANK (980726)
853 	 */
854 
855 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
856 	if (ipprot && ipprot->err_handler)
857 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
858 
859 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
860 	return;
861 
862 out:
863 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
864 }
865 
866 /*
867  *	Handle icmp messages
868  */
869 
870 static int icmpv6_rcv(struct sk_buff *skb)
871 {
872 	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
873 	struct net *net = dev_net(skb->dev);
874 	struct net_device *dev = icmp6_dev(skb);
875 	struct inet6_dev *idev = __in6_dev_get(dev);
876 	const struct in6_addr *saddr, *daddr;
877 	struct icmp6hdr *hdr;
878 	u8 type;
879 
880 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
881 		struct sec_path *sp = skb_sec_path(skb);
882 		int nh;
883 
884 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
885 				 XFRM_STATE_ICMP)) {
886 			reason = SKB_DROP_REASON_XFRM_POLICY;
887 			goto drop_no_count;
888 		}
889 
890 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
891 			goto drop_no_count;
892 
893 		nh = skb_network_offset(skb);
894 		skb_set_network_header(skb, sizeof(*hdr));
895 
896 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
897 						skb)) {
898 			reason = SKB_DROP_REASON_XFRM_POLICY;
899 			goto drop_no_count;
900 		}
901 
902 		skb_set_network_header(skb, nh);
903 	}
904 
905 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
906 
907 	saddr = &ipv6_hdr(skb)->saddr;
908 	daddr = &ipv6_hdr(skb)->daddr;
909 
910 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
911 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
912 				    saddr, daddr);
913 		goto csum_error;
914 	}
915 
916 	if (!pskb_pull(skb, sizeof(*hdr)))
917 		goto discard_it;
918 
919 	hdr = icmp6_hdr(skb);
920 
921 	type = hdr->icmp6_type;
922 
923 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
924 
925 	switch (type) {
926 	case ICMPV6_ECHO_REQUEST:
927 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
928 			icmpv6_echo_reply(skb);
929 		break;
930 	case ICMPV6_EXT_ECHO_REQUEST:
931 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
932 		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
933 			icmpv6_echo_reply(skb);
934 		break;
935 
936 	case ICMPV6_ECHO_REPLY:
937 		reason = ping_rcv(skb);
938 		break;
939 
940 	case ICMPV6_EXT_ECHO_REPLY:
941 		reason = ping_rcv(skb);
942 		break;
943 
944 	case ICMPV6_PKT_TOOBIG:
945 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
946 		   standard destination cache. Seems, only "advanced"
947 		   destination cache will allow to solve this problem
948 		   --ANK (980726)
949 		 */
950 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
951 			goto discard_it;
952 		hdr = icmp6_hdr(skb);
953 
954 		/* to notify */
955 		fallthrough;
956 	case ICMPV6_DEST_UNREACH:
957 	case ICMPV6_TIME_EXCEED:
958 	case ICMPV6_PARAMPROB:
959 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
960 		break;
961 
962 	case NDISC_ROUTER_SOLICITATION:
963 	case NDISC_ROUTER_ADVERTISEMENT:
964 	case NDISC_NEIGHBOUR_SOLICITATION:
965 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
966 	case NDISC_REDIRECT:
967 		ndisc_rcv(skb);
968 		break;
969 
970 	case ICMPV6_MGM_QUERY:
971 		igmp6_event_query(skb);
972 		return 0;
973 
974 	case ICMPV6_MGM_REPORT:
975 		igmp6_event_report(skb);
976 		return 0;
977 
978 	case ICMPV6_MGM_REDUCTION:
979 	case ICMPV6_NI_QUERY:
980 	case ICMPV6_NI_REPLY:
981 	case ICMPV6_MLD2_REPORT:
982 	case ICMPV6_DHAAD_REQUEST:
983 	case ICMPV6_DHAAD_REPLY:
984 	case ICMPV6_MOBILE_PREFIX_SOL:
985 	case ICMPV6_MOBILE_PREFIX_ADV:
986 		break;
987 
988 	default:
989 		/* informational */
990 		if (type & ICMPV6_INFOMSG_MASK)
991 			break;
992 
993 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
994 				    saddr, daddr);
995 
996 		/*
997 		 * error of unknown type.
998 		 * must pass to upper level
999 		 */
1000 
1001 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
1002 	}
1003 
1004 	/* until the v6 path can be better sorted assume failure and
1005 	 * preserve the status quo behaviour for the rest of the paths to here
1006 	 */
1007 	if (reason)
1008 		kfree_skb_reason(skb, reason);
1009 	else
1010 		consume_skb(skb);
1011 
1012 	return 0;
1013 
1014 csum_error:
1015 	reason = SKB_DROP_REASON_ICMP_CSUM;
1016 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1017 discard_it:
1018 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1019 drop_no_count:
1020 	kfree_skb_reason(skb, reason);
1021 	return 0;
1022 }
1023 
1024 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1025 		      u8 type,
1026 		      const struct in6_addr *saddr,
1027 		      const struct in6_addr *daddr,
1028 		      int oif)
1029 {
1030 	memset(fl6, 0, sizeof(*fl6));
1031 	fl6->saddr = *saddr;
1032 	fl6->daddr = *daddr;
1033 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1034 	fl6->fl6_icmp_type	= type;
1035 	fl6->fl6_icmp_code	= 0;
1036 	fl6->flowi6_oif		= oif;
1037 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1038 }
1039 
1040 int __init icmpv6_init(void)
1041 {
1042 	struct sock *sk;
1043 	int err, i;
1044 
1045 	for_each_possible_cpu(i) {
1046 		err = inet_ctl_sock_create(&sk, PF_INET6,
1047 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1048 		if (err < 0) {
1049 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1050 			       err);
1051 			return err;
1052 		}
1053 
1054 		per_cpu(ipv6_icmp_sk, i) = sk;
1055 
1056 		/* Enough space for 2 64K ICMP packets, including
1057 		 * sk_buff struct overhead.
1058 		 */
1059 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1060 	}
1061 
1062 	err = -EAGAIN;
1063 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1064 		goto fail;
1065 
1066 	err = inet6_register_icmp_sender(icmp6_send);
1067 	if (err)
1068 		goto sender_reg_err;
1069 	return 0;
1070 
1071 sender_reg_err:
1072 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1073 fail:
1074 	pr_err("Failed to register ICMP6 protocol\n");
1075 	return err;
1076 }
1077 
1078 void icmpv6_cleanup(void)
1079 {
1080 	inet6_unregister_icmp_sender(icmp6_send);
1081 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1082 }
1083 
1084 
1085 static const struct icmp6_err {
1086 	int err;
1087 	int fatal;
1088 } tab_unreach[] = {
1089 	{	/* NOROUTE */
1090 		.err	= ENETUNREACH,
1091 		.fatal	= 0,
1092 	},
1093 	{	/* ADM_PROHIBITED */
1094 		.err	= EACCES,
1095 		.fatal	= 1,
1096 	},
1097 	{	/* Was NOT_NEIGHBOUR, now reserved */
1098 		.err	= EHOSTUNREACH,
1099 		.fatal	= 0,
1100 	},
1101 	{	/* ADDR_UNREACH	*/
1102 		.err	= EHOSTUNREACH,
1103 		.fatal	= 0,
1104 	},
1105 	{	/* PORT_UNREACH	*/
1106 		.err	= ECONNREFUSED,
1107 		.fatal	= 1,
1108 	},
1109 	{	/* POLICY_FAIL */
1110 		.err	= EACCES,
1111 		.fatal	= 1,
1112 	},
1113 	{	/* REJECT_ROUTE	*/
1114 		.err	= EACCES,
1115 		.fatal	= 1,
1116 	},
1117 };
1118 
1119 int icmpv6_err_convert(u8 type, u8 code, int *err)
1120 {
1121 	int fatal = 0;
1122 
1123 	*err = EPROTO;
1124 
1125 	switch (type) {
1126 	case ICMPV6_DEST_UNREACH:
1127 		fatal = 1;
1128 		if (code < ARRAY_SIZE(tab_unreach)) {
1129 			*err  = tab_unreach[code].err;
1130 			fatal = tab_unreach[code].fatal;
1131 		}
1132 		break;
1133 
1134 	case ICMPV6_PKT_TOOBIG:
1135 		*err = EMSGSIZE;
1136 		break;
1137 
1138 	case ICMPV6_PARAMPROB:
1139 		*err = EPROTO;
1140 		fatal = 1;
1141 		break;
1142 
1143 	case ICMPV6_TIME_EXCEED:
1144 		*err = EHOSTUNREACH;
1145 		break;
1146 	}
1147 
1148 	return fatal;
1149 }
1150 EXPORT_SYMBOL(icmpv6_err_convert);
1151 
1152 #ifdef CONFIG_SYSCTL
1153 static struct ctl_table ipv6_icmp_table_template[] = {
1154 	{
1155 		.procname	= "ratelimit",
1156 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1157 		.maxlen		= sizeof(int),
1158 		.mode		= 0644,
1159 		.proc_handler	= proc_dointvec_ms_jiffies,
1160 	},
1161 	{
1162 		.procname	= "echo_ignore_all",
1163 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1164 		.maxlen		= sizeof(u8),
1165 		.mode		= 0644,
1166 		.proc_handler = proc_dou8vec_minmax,
1167 	},
1168 	{
1169 		.procname	= "echo_ignore_multicast",
1170 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1171 		.maxlen		= sizeof(u8),
1172 		.mode		= 0644,
1173 		.proc_handler = proc_dou8vec_minmax,
1174 	},
1175 	{
1176 		.procname	= "echo_ignore_anycast",
1177 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1178 		.maxlen		= sizeof(u8),
1179 		.mode		= 0644,
1180 		.proc_handler = proc_dou8vec_minmax,
1181 	},
1182 	{
1183 		.procname	= "ratemask",
1184 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1185 		.maxlen		= ICMPV6_MSG_MAX + 1,
1186 		.mode		= 0644,
1187 		.proc_handler = proc_do_large_bitmap,
1188 	},
1189 	{ },
1190 };
1191 
1192 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1193 {
1194 	struct ctl_table *table;
1195 
1196 	table = kmemdup(ipv6_icmp_table_template,
1197 			sizeof(ipv6_icmp_table_template),
1198 			GFP_KERNEL);
1199 
1200 	if (table) {
1201 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1202 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1203 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1204 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1205 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1206 	}
1207 	return table;
1208 }
1209 #endif
1210