xref: /openbmc/linux/net/ipv6/icmp.c (revision d3402925)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180 	if (icmpv6_mask_allow(net, type))
181 		return true;
182 
183 	if (icmp_global_allow())
184 		return true;
185 
186 	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	if (!res)
229 		__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
230 				  ICMP6_MIB_RATELIMITHOST);
231 	dst_release(dst);
232 	return res;
233 }
234 
235 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
236 				  struct flowi6 *fl6)
237 {
238 	struct net *net = sock_net(sk);
239 	struct dst_entry *dst;
240 	bool res = false;
241 
242 	dst = ip6_route_output(net, sk, fl6);
243 	if (!dst->error) {
244 		struct rt6_info *rt = (struct rt6_info *)dst;
245 		struct in6_addr prefsrc;
246 
247 		rt6_get_prefsrc(rt, &prefsrc);
248 		res = !ipv6_addr_any(&prefsrc);
249 	}
250 	dst_release(dst);
251 	return res;
252 }
253 
254 /*
255  *	an inline helper for the "simple" if statement below
256  *	checks if parameter problem report is caused by an
257  *	unrecognized IPv6 option that has the Option Type
258  *	highest-order two bits set to 10
259  */
260 
261 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
262 {
263 	u8 _optval, *op;
264 
265 	offset += skb_network_offset(skb);
266 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
267 	if (!op)
268 		return true;
269 	return (*op & 0xC0) == 0x80;
270 }
271 
272 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
273 				struct icmp6hdr *thdr, int len)
274 {
275 	struct sk_buff *skb;
276 	struct icmp6hdr *icmp6h;
277 
278 	skb = skb_peek(&sk->sk_write_queue);
279 	if (!skb)
280 		return;
281 
282 	icmp6h = icmp6_hdr(skb);
283 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
284 	icmp6h->icmp6_cksum = 0;
285 
286 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
287 		skb->csum = csum_partial(icmp6h,
288 					sizeof(struct icmp6hdr), skb->csum);
289 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
290 						      &fl6->daddr,
291 						      len, fl6->flowi6_proto,
292 						      skb->csum);
293 	} else {
294 		__wsum tmp_csum = 0;
295 
296 		skb_queue_walk(&sk->sk_write_queue, skb) {
297 			tmp_csum = csum_add(tmp_csum, skb->csum);
298 		}
299 
300 		tmp_csum = csum_partial(icmp6h,
301 					sizeof(struct icmp6hdr), tmp_csum);
302 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
303 						      &fl6->daddr,
304 						      len, fl6->flowi6_proto,
305 						      tmp_csum);
306 	}
307 	ip6_push_pending_frames(sk);
308 }
309 
310 struct icmpv6_msg {
311 	struct sk_buff	*skb;
312 	int		offset;
313 	uint8_t		type;
314 };
315 
316 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
317 {
318 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
319 	struct sk_buff *org_skb = msg->skb;
320 	__wsum csum;
321 
322 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
323 				      to, len);
324 	skb->csum = csum_block_add(skb->csum, csum, odd);
325 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
326 		nf_ct_attach(skb, org_skb);
327 	return 0;
328 }
329 
330 #if IS_ENABLED(CONFIG_IPV6_MIP6)
331 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
332 {
333 	struct ipv6hdr *iph = ipv6_hdr(skb);
334 	struct ipv6_destopt_hao *hao;
335 	int off;
336 
337 	if (opt->dsthao) {
338 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
339 		if (likely(off >= 0)) {
340 			hao = (struct ipv6_destopt_hao *)
341 					(skb_network_header(skb) + off);
342 			swap(iph->saddr, hao->addr);
343 		}
344 	}
345 }
346 #else
347 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
348 #endif
349 
350 static struct dst_entry *icmpv6_route_lookup(struct net *net,
351 					     struct sk_buff *skb,
352 					     struct sock *sk,
353 					     struct flowi6 *fl6)
354 {
355 	struct dst_entry *dst, *dst2;
356 	struct flowi6 fl2;
357 	int err;
358 
359 	err = ip6_dst_lookup(net, sk, &dst, fl6);
360 	if (err)
361 		return ERR_PTR(err);
362 
363 	/*
364 	 * We won't send icmp if the destination is known
365 	 * anycast.
366 	 */
367 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
368 		net_dbg_ratelimited("icmp6_send: acast source\n");
369 		dst_release(dst);
370 		return ERR_PTR(-EINVAL);
371 	}
372 
373 	/* No need to clone since we're just using its address. */
374 	dst2 = dst;
375 
376 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
377 	if (!IS_ERR(dst)) {
378 		if (dst != dst2)
379 			return dst;
380 	} else {
381 		if (PTR_ERR(dst) == -EPERM)
382 			dst = NULL;
383 		else
384 			return dst;
385 	}
386 
387 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
388 	if (err)
389 		goto relookup_failed;
390 
391 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
392 	if (err)
393 		goto relookup_failed;
394 
395 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
396 	if (!IS_ERR(dst2)) {
397 		dst_release(dst);
398 		dst = dst2;
399 	} else {
400 		err = PTR_ERR(dst2);
401 		if (err == -EPERM) {
402 			dst_release(dst);
403 			return dst2;
404 		} else
405 			goto relookup_failed;
406 	}
407 
408 relookup_failed:
409 	if (dst)
410 		return dst;
411 	return ERR_PTR(err);
412 }
413 
414 static struct net_device *icmp6_dev(const struct sk_buff *skb)
415 {
416 	struct net_device *dev = skb->dev;
417 
418 	/* for local traffic to local address, skb dev is the loopback
419 	 * device. Check if there is a dst attached to the skb and if so
420 	 * get the real device index. Same is needed for replies to a link
421 	 * local address on a device enslaved to an L3 master device
422 	 */
423 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
424 		const struct rt6_info *rt6 = skb_rt6_info(skb);
425 
426 		if (rt6)
427 			dev = rt6->rt6i_idev->dev;
428 	}
429 
430 	return dev;
431 }
432 
433 static int icmp6_iif(const struct sk_buff *skb)
434 {
435 	return icmp6_dev(skb)->ifindex;
436 }
437 
438 /*
439  *	Send an ICMP message in response to a packet in error
440  */
441 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
442 		const struct in6_addr *force_saddr,
443 		const struct inet6_skb_parm *parm)
444 {
445 	struct inet6_dev *idev = NULL;
446 	struct ipv6hdr *hdr = ipv6_hdr(skb);
447 	struct sock *sk;
448 	struct net *net;
449 	struct ipv6_pinfo *np;
450 	const struct in6_addr *saddr = NULL;
451 	struct dst_entry *dst;
452 	struct icmp6hdr tmp_hdr;
453 	struct flowi6 fl6;
454 	struct icmpv6_msg msg;
455 	struct ipcm6_cookie ipc6;
456 	int iif = 0;
457 	int addr_type = 0;
458 	int len;
459 	u32 mark;
460 
461 	if ((u8 *)hdr < skb->head ||
462 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
463 		return;
464 
465 	if (!skb->dev)
466 		return;
467 	net = dev_net(skb->dev);
468 	mark = IP6_REPLY_MARK(net, skb->mark);
469 	/*
470 	 *	Make sure we respect the rules
471 	 *	i.e. RFC 1885 2.4(e)
472 	 *	Rule (e.1) is enforced by not using icmp6_send
473 	 *	in any code that processes icmp errors.
474 	 */
475 	addr_type = ipv6_addr_type(&hdr->daddr);
476 
477 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
478 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
479 		saddr = &hdr->daddr;
480 
481 	/*
482 	 *	Dest addr check
483 	 */
484 
485 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
486 		if (type != ICMPV6_PKT_TOOBIG &&
487 		    !(type == ICMPV6_PARAMPROB &&
488 		      code == ICMPV6_UNK_OPTION &&
489 		      (opt_unrec(skb, info))))
490 			return;
491 
492 		saddr = NULL;
493 	}
494 
495 	addr_type = ipv6_addr_type(&hdr->saddr);
496 
497 	/*
498 	 *	Source addr check
499 	 */
500 
501 	if (__ipv6_addr_needs_scope_id(addr_type)) {
502 		iif = icmp6_iif(skb);
503 	} else {
504 		/*
505 		 * The source device is used for looking up which routing table
506 		 * to use for sending an ICMP error.
507 		 */
508 		iif = l3mdev_master_ifindex(skb->dev);
509 	}
510 
511 	/*
512 	 *	Must not send error if the source does not uniquely
513 	 *	identify a single node (RFC2463 Section 2.4).
514 	 *	We check unspecified / multicast addresses here,
515 	 *	and anycast addresses will be checked later.
516 	 */
517 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
518 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
519 				    &hdr->saddr, &hdr->daddr);
520 		return;
521 	}
522 
523 	/*
524 	 *	Never answer to a ICMP packet.
525 	 */
526 	if (is_ineligible(skb)) {
527 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
528 				    &hdr->saddr, &hdr->daddr);
529 		return;
530 	}
531 
532 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
533 	local_bh_disable();
534 
535 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
536 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
537 		goto out_bh_enable;
538 
539 	mip6_addr_swap(skb, parm);
540 
541 	sk = icmpv6_xmit_lock(net);
542 	if (!sk)
543 		goto out_bh_enable;
544 
545 	memset(&fl6, 0, sizeof(fl6));
546 	fl6.flowi6_proto = IPPROTO_ICMPV6;
547 	fl6.daddr = hdr->saddr;
548 	if (force_saddr)
549 		saddr = force_saddr;
550 	if (saddr) {
551 		fl6.saddr = *saddr;
552 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
553 		/* select a more meaningful saddr from input if */
554 		struct net_device *in_netdev;
555 
556 		in_netdev = dev_get_by_index(net, parm->iif);
557 		if (in_netdev) {
558 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
559 					   inet6_sk(sk)->srcprefs,
560 					   &fl6.saddr);
561 			dev_put(in_netdev);
562 		}
563 	}
564 	fl6.flowi6_mark = mark;
565 	fl6.flowi6_oif = iif;
566 	fl6.fl6_icmp_type = type;
567 	fl6.fl6_icmp_code = code;
568 	fl6.flowi6_uid = sock_net_uid(net, NULL);
569 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
570 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
571 
572 	np = inet6_sk(sk);
573 
574 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
575 		goto out;
576 
577 	tmp_hdr.icmp6_type = type;
578 	tmp_hdr.icmp6_code = code;
579 	tmp_hdr.icmp6_cksum = 0;
580 	tmp_hdr.icmp6_pointer = htonl(info);
581 
582 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
583 		fl6.flowi6_oif = np->mcast_oif;
584 	else if (!fl6.flowi6_oif)
585 		fl6.flowi6_oif = np->ucast_oif;
586 
587 	ipcm6_init_sk(&ipc6, np);
588 	ipc6.sockc.mark = mark;
589 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
590 
591 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
592 	if (IS_ERR(dst))
593 		goto out;
594 
595 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
596 
597 	msg.skb = skb;
598 	msg.offset = skb_network_offset(skb);
599 	msg.type = type;
600 
601 	len = skb->len - msg.offset;
602 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
603 	if (len < 0) {
604 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
605 				    &hdr->saddr, &hdr->daddr);
606 		goto out_dst_release;
607 	}
608 
609 	rcu_read_lock();
610 	idev = __in6_dev_get(skb->dev);
611 
612 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
613 			    len + sizeof(struct icmp6hdr),
614 			    sizeof(struct icmp6hdr),
615 			    &ipc6, &fl6, (struct rt6_info *)dst,
616 			    MSG_DONTWAIT)) {
617 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
618 		ip6_flush_pending_frames(sk);
619 	} else {
620 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
621 					   len + sizeof(struct icmp6hdr));
622 	}
623 	rcu_read_unlock();
624 out_dst_release:
625 	dst_release(dst);
626 out:
627 	icmpv6_xmit_unlock(sk);
628 out_bh_enable:
629 	local_bh_enable();
630 }
631 EXPORT_SYMBOL(icmp6_send);
632 
633 /* Slightly more convenient version of icmp6_send with drop reasons.
634  */
635 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
636 			      enum skb_drop_reason reason)
637 {
638 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
639 	kfree_skb_reason(skb, reason);
640 }
641 
642 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
643  * if sufficient data bytes are available
644  * @nhs is the size of the tunnel header(s) :
645  *  Either an IPv4 header for SIT encap
646  *         an IPv4 header + GRE header for GRE encap
647  */
648 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
649 			       unsigned int data_len)
650 {
651 	struct in6_addr temp_saddr;
652 	struct rt6_info *rt;
653 	struct sk_buff *skb2;
654 	u32 info = 0;
655 
656 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
657 		return 1;
658 
659 	/* RFC 4884 (partial) support for ICMP extensions */
660 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
661 		data_len = 0;
662 
663 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
664 
665 	if (!skb2)
666 		return 1;
667 
668 	skb_dst_drop(skb2);
669 	skb_pull(skb2, nhs);
670 	skb_reset_network_header(skb2);
671 
672 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
673 			skb, 0);
674 
675 	if (rt && rt->dst.dev)
676 		skb2->dev = rt->dst.dev;
677 
678 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
679 
680 	if (data_len) {
681 		/* RFC 4884 (partial) support :
682 		 * insert 0 padding at the end, before the extensions
683 		 */
684 		__skb_push(skb2, nhs);
685 		skb_reset_network_header(skb2);
686 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
687 		memset(skb2->data + data_len - nhs, 0, nhs);
688 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
689 		 * and stored in reserved[0]
690 		 */
691 		info = (data_len/8) << 24;
692 	}
693 	if (type == ICMP_TIME_EXCEEDED)
694 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
695 			   info, &temp_saddr, IP6CB(skb2));
696 	else
697 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
698 			   info, &temp_saddr, IP6CB(skb2));
699 	if (rt)
700 		ip6_rt_put(rt);
701 
702 	kfree_skb(skb2);
703 
704 	return 0;
705 }
706 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
707 
708 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
709 {
710 	struct net *net = dev_net(skb->dev);
711 	struct sock *sk;
712 	struct inet6_dev *idev;
713 	struct ipv6_pinfo *np;
714 	const struct in6_addr *saddr = NULL;
715 	struct icmp6hdr *icmph = icmp6_hdr(skb);
716 	struct icmp6hdr tmp_hdr;
717 	struct flowi6 fl6;
718 	struct icmpv6_msg msg;
719 	struct dst_entry *dst;
720 	struct ipcm6_cookie ipc6;
721 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
722 	SKB_DR(reason);
723 	bool acast;
724 	u8 type;
725 
726 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
727 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
728 		return reason;
729 
730 	saddr = &ipv6_hdr(skb)->daddr;
731 
732 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
733 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
734 		return reason;
735 
736 	if (!ipv6_unicast_destination(skb) &&
737 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
738 		saddr = NULL;
739 
740 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
741 		type = ICMPV6_EXT_ECHO_REPLY;
742 	else
743 		type = ICMPV6_ECHO_REPLY;
744 
745 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
746 	tmp_hdr.icmp6_type = type;
747 
748 	memset(&fl6, 0, sizeof(fl6));
749 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
750 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
751 
752 	fl6.flowi6_proto = IPPROTO_ICMPV6;
753 	fl6.daddr = ipv6_hdr(skb)->saddr;
754 	if (saddr)
755 		fl6.saddr = *saddr;
756 	fl6.flowi6_oif = icmp6_iif(skb);
757 	fl6.fl6_icmp_type = type;
758 	fl6.flowi6_mark = mark;
759 	fl6.flowi6_uid = sock_net_uid(net, NULL);
760 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
761 
762 	local_bh_disable();
763 	sk = icmpv6_xmit_lock(net);
764 	if (!sk)
765 		goto out_bh_enable;
766 	np = inet6_sk(sk);
767 
768 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
769 		fl6.flowi6_oif = np->mcast_oif;
770 	else if (!fl6.flowi6_oif)
771 		fl6.flowi6_oif = np->ucast_oif;
772 
773 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
774 		goto out;
775 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
776 	if (IS_ERR(dst))
777 		goto out;
778 
779 	/* Check the ratelimit */
780 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
781 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
782 		goto out_dst_release;
783 
784 	idev = __in6_dev_get(skb->dev);
785 
786 	msg.skb = skb;
787 	msg.offset = 0;
788 	msg.type = type;
789 
790 	ipcm6_init_sk(&ipc6, np);
791 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
792 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
793 	ipc6.sockc.mark = mark;
794 
795 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
796 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
797 			goto out_dst_release;
798 
799 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
800 			    skb->len + sizeof(struct icmp6hdr),
801 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
802 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
803 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
804 		ip6_flush_pending_frames(sk);
805 	} else {
806 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
807 					   skb->len + sizeof(struct icmp6hdr));
808 		reason = SKB_CONSUMED;
809 	}
810 out_dst_release:
811 	dst_release(dst);
812 out:
813 	icmpv6_xmit_unlock(sk);
814 out_bh_enable:
815 	local_bh_enable();
816 	return reason;
817 }
818 
819 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
820 				   u8 code, __be32 info)
821 {
822 	struct inet6_skb_parm *opt = IP6CB(skb);
823 	struct net *net = dev_net(skb->dev);
824 	const struct inet6_protocol *ipprot;
825 	enum skb_drop_reason reason;
826 	int inner_offset;
827 	__be16 frag_off;
828 	u8 nexthdr;
829 
830 	reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
831 	if (reason != SKB_NOT_DROPPED_YET)
832 		goto out;
833 
834 	seg6_icmp_srh(skb, opt);
835 
836 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
837 	if (ipv6_ext_hdr(nexthdr)) {
838 		/* now skip over extension headers */
839 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
840 						&nexthdr, &frag_off);
841 		if (inner_offset < 0) {
842 			SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
843 			goto out;
844 		}
845 	} else {
846 		inner_offset = sizeof(struct ipv6hdr);
847 	}
848 
849 	/* Checkin header including 8 bytes of inner protocol header. */
850 	reason = pskb_may_pull_reason(skb, inner_offset + 8);
851 	if (reason != SKB_NOT_DROPPED_YET)
852 		goto out;
853 
854 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
855 	   Without this we will not able f.e. to make source routed
856 	   pmtu discovery.
857 	   Corresponding argument (opt) to notifiers is already added.
858 	   --ANK (980726)
859 	 */
860 
861 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
862 	if (ipprot && ipprot->err_handler)
863 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
864 
865 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
866 	return SKB_CONSUMED;
867 
868 out:
869 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
870 	return reason;
871 }
872 
873 /*
874  *	Handle icmp messages
875  */
876 
877 static int icmpv6_rcv(struct sk_buff *skb)
878 {
879 	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
880 	struct net *net = dev_net(skb->dev);
881 	struct net_device *dev = icmp6_dev(skb);
882 	struct inet6_dev *idev = __in6_dev_get(dev);
883 	const struct in6_addr *saddr, *daddr;
884 	struct icmp6hdr *hdr;
885 	u8 type;
886 
887 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
888 		struct sec_path *sp = skb_sec_path(skb);
889 		int nh;
890 
891 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
892 				 XFRM_STATE_ICMP)) {
893 			reason = SKB_DROP_REASON_XFRM_POLICY;
894 			goto drop_no_count;
895 		}
896 
897 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
898 			goto drop_no_count;
899 
900 		nh = skb_network_offset(skb);
901 		skb_set_network_header(skb, sizeof(*hdr));
902 
903 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
904 						skb)) {
905 			reason = SKB_DROP_REASON_XFRM_POLICY;
906 			goto drop_no_count;
907 		}
908 
909 		skb_set_network_header(skb, nh);
910 	}
911 
912 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
913 
914 	saddr = &ipv6_hdr(skb)->saddr;
915 	daddr = &ipv6_hdr(skb)->daddr;
916 
917 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
918 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
919 				    saddr, daddr);
920 		goto csum_error;
921 	}
922 
923 	if (!pskb_pull(skb, sizeof(*hdr)))
924 		goto discard_it;
925 
926 	hdr = icmp6_hdr(skb);
927 
928 	type = hdr->icmp6_type;
929 
930 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
931 
932 	switch (type) {
933 	case ICMPV6_ECHO_REQUEST:
934 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
935 			reason = icmpv6_echo_reply(skb);
936 		break;
937 	case ICMPV6_EXT_ECHO_REQUEST:
938 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
939 		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
940 			reason = icmpv6_echo_reply(skb);
941 		break;
942 
943 	case ICMPV6_ECHO_REPLY:
944 		reason = ping_rcv(skb);
945 		break;
946 
947 	case ICMPV6_EXT_ECHO_REPLY:
948 		reason = ping_rcv(skb);
949 		break;
950 
951 	case ICMPV6_PKT_TOOBIG:
952 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
953 		   standard destination cache. Seems, only "advanced"
954 		   destination cache will allow to solve this problem
955 		   --ANK (980726)
956 		 */
957 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
958 			goto discard_it;
959 		hdr = icmp6_hdr(skb);
960 
961 		/* to notify */
962 		fallthrough;
963 	case ICMPV6_DEST_UNREACH:
964 	case ICMPV6_TIME_EXCEED:
965 	case ICMPV6_PARAMPROB:
966 		reason = icmpv6_notify(skb, type, hdr->icmp6_code,
967 				       hdr->icmp6_mtu);
968 		break;
969 
970 	case NDISC_ROUTER_SOLICITATION:
971 	case NDISC_ROUTER_ADVERTISEMENT:
972 	case NDISC_NEIGHBOUR_SOLICITATION:
973 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
974 	case NDISC_REDIRECT:
975 		reason = ndisc_rcv(skb);
976 		break;
977 
978 	case ICMPV6_MGM_QUERY:
979 		igmp6_event_query(skb);
980 		return 0;
981 
982 	case ICMPV6_MGM_REPORT:
983 		igmp6_event_report(skb);
984 		return 0;
985 
986 	case ICMPV6_MGM_REDUCTION:
987 	case ICMPV6_NI_QUERY:
988 	case ICMPV6_NI_REPLY:
989 	case ICMPV6_MLD2_REPORT:
990 	case ICMPV6_DHAAD_REQUEST:
991 	case ICMPV6_DHAAD_REPLY:
992 	case ICMPV6_MOBILE_PREFIX_SOL:
993 	case ICMPV6_MOBILE_PREFIX_ADV:
994 		break;
995 
996 	default:
997 		/* informational */
998 		if (type & ICMPV6_INFOMSG_MASK)
999 			break;
1000 
1001 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1002 				    saddr, daddr);
1003 
1004 		/*
1005 		 * error of unknown type.
1006 		 * must pass to upper level
1007 		 */
1008 
1009 		reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1010 				       hdr->icmp6_mtu);
1011 	}
1012 
1013 	/* until the v6 path can be better sorted assume failure and
1014 	 * preserve the status quo behaviour for the rest of the paths to here
1015 	 */
1016 	if (reason)
1017 		kfree_skb_reason(skb, reason);
1018 	else
1019 		consume_skb(skb);
1020 
1021 	return 0;
1022 
1023 csum_error:
1024 	reason = SKB_DROP_REASON_ICMP_CSUM;
1025 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1026 discard_it:
1027 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1028 drop_no_count:
1029 	kfree_skb_reason(skb, reason);
1030 	return 0;
1031 }
1032 
1033 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1034 		      u8 type,
1035 		      const struct in6_addr *saddr,
1036 		      const struct in6_addr *daddr,
1037 		      int oif)
1038 {
1039 	memset(fl6, 0, sizeof(*fl6));
1040 	fl6->saddr = *saddr;
1041 	fl6->daddr = *daddr;
1042 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1043 	fl6->fl6_icmp_type	= type;
1044 	fl6->fl6_icmp_code	= 0;
1045 	fl6->flowi6_oif		= oif;
1046 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1047 }
1048 
1049 int __init icmpv6_init(void)
1050 {
1051 	struct sock *sk;
1052 	int err, i;
1053 
1054 	for_each_possible_cpu(i) {
1055 		err = inet_ctl_sock_create(&sk, PF_INET6,
1056 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1057 		if (err < 0) {
1058 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1059 			       err);
1060 			return err;
1061 		}
1062 
1063 		per_cpu(ipv6_icmp_sk, i) = sk;
1064 
1065 		/* Enough space for 2 64K ICMP packets, including
1066 		 * sk_buff struct overhead.
1067 		 */
1068 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1069 	}
1070 
1071 	err = -EAGAIN;
1072 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1073 		goto fail;
1074 
1075 	err = inet6_register_icmp_sender(icmp6_send);
1076 	if (err)
1077 		goto sender_reg_err;
1078 	return 0;
1079 
1080 sender_reg_err:
1081 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1082 fail:
1083 	pr_err("Failed to register ICMP6 protocol\n");
1084 	return err;
1085 }
1086 
1087 void icmpv6_cleanup(void)
1088 {
1089 	inet6_unregister_icmp_sender(icmp6_send);
1090 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1091 }
1092 
1093 
1094 static const struct icmp6_err {
1095 	int err;
1096 	int fatal;
1097 } tab_unreach[] = {
1098 	{	/* NOROUTE */
1099 		.err	= ENETUNREACH,
1100 		.fatal	= 0,
1101 	},
1102 	{	/* ADM_PROHIBITED */
1103 		.err	= EACCES,
1104 		.fatal	= 1,
1105 	},
1106 	{	/* Was NOT_NEIGHBOUR, now reserved */
1107 		.err	= EHOSTUNREACH,
1108 		.fatal	= 0,
1109 	},
1110 	{	/* ADDR_UNREACH	*/
1111 		.err	= EHOSTUNREACH,
1112 		.fatal	= 0,
1113 	},
1114 	{	/* PORT_UNREACH	*/
1115 		.err	= ECONNREFUSED,
1116 		.fatal	= 1,
1117 	},
1118 	{	/* POLICY_FAIL */
1119 		.err	= EACCES,
1120 		.fatal	= 1,
1121 	},
1122 	{	/* REJECT_ROUTE	*/
1123 		.err	= EACCES,
1124 		.fatal	= 1,
1125 	},
1126 };
1127 
1128 int icmpv6_err_convert(u8 type, u8 code, int *err)
1129 {
1130 	int fatal = 0;
1131 
1132 	*err = EPROTO;
1133 
1134 	switch (type) {
1135 	case ICMPV6_DEST_UNREACH:
1136 		fatal = 1;
1137 		if (code < ARRAY_SIZE(tab_unreach)) {
1138 			*err  = tab_unreach[code].err;
1139 			fatal = tab_unreach[code].fatal;
1140 		}
1141 		break;
1142 
1143 	case ICMPV6_PKT_TOOBIG:
1144 		*err = EMSGSIZE;
1145 		break;
1146 
1147 	case ICMPV6_PARAMPROB:
1148 		*err = EPROTO;
1149 		fatal = 1;
1150 		break;
1151 
1152 	case ICMPV6_TIME_EXCEED:
1153 		*err = EHOSTUNREACH;
1154 		break;
1155 	}
1156 
1157 	return fatal;
1158 }
1159 EXPORT_SYMBOL(icmpv6_err_convert);
1160 
1161 #ifdef CONFIG_SYSCTL
1162 static struct ctl_table ipv6_icmp_table_template[] = {
1163 	{
1164 		.procname	= "ratelimit",
1165 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1166 		.maxlen		= sizeof(int),
1167 		.mode		= 0644,
1168 		.proc_handler	= proc_dointvec_ms_jiffies,
1169 	},
1170 	{
1171 		.procname	= "echo_ignore_all",
1172 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1173 		.maxlen		= sizeof(u8),
1174 		.mode		= 0644,
1175 		.proc_handler = proc_dou8vec_minmax,
1176 	},
1177 	{
1178 		.procname	= "echo_ignore_multicast",
1179 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1180 		.maxlen		= sizeof(u8),
1181 		.mode		= 0644,
1182 		.proc_handler = proc_dou8vec_minmax,
1183 	},
1184 	{
1185 		.procname	= "echo_ignore_anycast",
1186 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1187 		.maxlen		= sizeof(u8),
1188 		.mode		= 0644,
1189 		.proc_handler = proc_dou8vec_minmax,
1190 	},
1191 	{
1192 		.procname	= "ratemask",
1193 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1194 		.maxlen		= ICMPV6_MSG_MAX + 1,
1195 		.mode		= 0644,
1196 		.proc_handler = proc_do_large_bitmap,
1197 	},
1198 	{ },
1199 };
1200 
1201 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1202 {
1203 	struct ctl_table *table;
1204 
1205 	table = kmemdup(ipv6_icmp_table_template,
1206 			sizeof(ipv6_icmp_table_template),
1207 			GFP_KERNEL);
1208 
1209 	if (table) {
1210 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1211 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1212 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1213 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1214 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1215 	}
1216 	return table;
1217 }
1218 #endif
1219