xref: /openbmc/linux/net/ipv6/icmp.c (revision cce955ef)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 /*
73  *	The ICMP socket(s). This is the most convenient way to flow control
74  *	our ICMP output as well as maintain a clean interface throughout
75  *	all layers. All Socketless IP sends will soon be gone.
76  *
77  *	On SMP we have one ICMP socket per-cpu.
78  */
79 static struct sock *icmpv6_sk(struct net *net)
80 {
81 	return this_cpu_read(*net->ipv6.icmp_sk);
82 }
83 
84 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
85 		       u8 type, u8 code, int offset, __be32 info)
86 {
87 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
88 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
89 	struct net *net = dev_net(skb->dev);
90 
91 	if (type == ICMPV6_PKT_TOOBIG)
92 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
93 	else if (type == NDISC_REDIRECT)
94 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
95 			     sock_net_uid(net, NULL));
96 
97 	if (!(type & ICMPV6_INFOMSG_MASK))
98 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
99 			ping_err(skb, offset, ntohl(info));
100 
101 	return 0;
102 }
103 
104 static int icmpv6_rcv(struct sk_buff *skb);
105 
106 static const struct inet6_protocol icmpv6_protocol = {
107 	.handler	=	icmpv6_rcv,
108 	.err_handler	=	icmpv6_err,
109 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
110 };
111 
112 /* Called with BH disabled */
113 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
114 {
115 	struct sock *sk;
116 
117 	sk = icmpv6_sk(net);
118 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
119 		/* This can happen if the output path (f.e. SIT or
120 		 * ip6ip6 tunnel) signals dst_link_failure() for an
121 		 * outgoing ICMP6 packet.
122 		 */
123 		return NULL;
124 	}
125 	return sk;
126 }
127 
128 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
129 {
130 	spin_unlock(&sk->sk_lock.slock);
131 }
132 
133 /*
134  * Figure out, may we reply to this packet with icmp error.
135  *
136  * We do not reply, if:
137  *	- it was icmp error message.
138  *	- it is truncated, so that it is known, that protocol is ICMPV6
139  *	  (i.e. in the middle of some exthdr)
140  *
141  *	--ANK (980726)
142  */
143 
144 static bool is_ineligible(const struct sk_buff *skb)
145 {
146 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
147 	int len = skb->len - ptr;
148 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
149 	__be16 frag_off;
150 
151 	if (len < 0)
152 		return true;
153 
154 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
155 	if (ptr < 0)
156 		return false;
157 	if (nexthdr == IPPROTO_ICMPV6) {
158 		u8 _type, *tp;
159 		tp = skb_header_pointer(skb,
160 			ptr+offsetof(struct icmp6hdr, icmp6_type),
161 			sizeof(_type), &_type);
162 
163 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
164 		 * false if this is a fragment packet with no icmp header info.
165 		 */
166 		if (!tp && frag_off != 0)
167 			return false;
168 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
169 			return true;
170 	}
171 	return false;
172 }
173 
174 static bool icmpv6_mask_allow(struct net *net, int type)
175 {
176 	if (type > ICMPV6_MSG_MAX)
177 		return true;
178 
179 	/* Limit if icmp type is set in ratemask. */
180 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
181 		return true;
182 
183 	return false;
184 }
185 
186 static bool icmpv6_global_allow(struct net *net, int type)
187 {
188 	if (icmpv6_mask_allow(net, type))
189 		return true;
190 
191 	if (icmp_global_allow())
192 		return true;
193 
194 	return false;
195 }
196 
197 /*
198  * Check the ICMP output rate limit
199  */
200 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
201 			       struct flowi6 *fl6)
202 {
203 	struct net *net = sock_net(sk);
204 	struct dst_entry *dst;
205 	bool res = false;
206 
207 	if (icmpv6_mask_allow(net, type))
208 		return true;
209 
210 	/*
211 	 * Look up the output route.
212 	 * XXX: perhaps the expire for routing entries cloned by
213 	 * this lookup should be more aggressive (not longer than timeout).
214 	 */
215 	dst = ip6_route_output(net, sk, fl6);
216 	if (dst->error) {
217 		IP6_INC_STATS(net, ip6_dst_idev(dst),
218 			      IPSTATS_MIB_OUTNOROUTES);
219 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
220 		res = true;
221 	} else {
222 		struct rt6_info *rt = (struct rt6_info *)dst;
223 		int tmo = net->ipv6.sysctl.icmpv6_time;
224 		struct inet_peer *peer;
225 
226 		/* Give more bandwidth to wider prefixes. */
227 		if (rt->rt6i_dst.plen < 128)
228 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
229 
230 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
231 		res = inet_peer_xrlim_allow(peer, tmo);
232 		if (peer)
233 			inet_putpeer(peer);
234 	}
235 	dst_release(dst);
236 	return res;
237 }
238 
239 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
240 				  struct flowi6 *fl6)
241 {
242 	struct net *net = sock_net(sk);
243 	struct dst_entry *dst;
244 	bool res = false;
245 
246 	dst = ip6_route_output(net, sk, fl6);
247 	if (!dst->error) {
248 		struct rt6_info *rt = (struct rt6_info *)dst;
249 		struct in6_addr prefsrc;
250 
251 		rt6_get_prefsrc(rt, &prefsrc);
252 		res = !ipv6_addr_any(&prefsrc);
253 	}
254 	dst_release(dst);
255 	return res;
256 }
257 
258 /*
259  *	an inline helper for the "simple" if statement below
260  *	checks if parameter problem report is caused by an
261  *	unrecognized IPv6 option that has the Option Type
262  *	highest-order two bits set to 10
263  */
264 
265 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
266 {
267 	u8 _optval, *op;
268 
269 	offset += skb_network_offset(skb);
270 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
271 	if (!op)
272 		return true;
273 	return (*op & 0xC0) == 0x80;
274 }
275 
276 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
277 				struct icmp6hdr *thdr, int len)
278 {
279 	struct sk_buff *skb;
280 	struct icmp6hdr *icmp6h;
281 
282 	skb = skb_peek(&sk->sk_write_queue);
283 	if (!skb)
284 		return;
285 
286 	icmp6h = icmp6_hdr(skb);
287 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
288 	icmp6h->icmp6_cksum = 0;
289 
290 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
291 		skb->csum = csum_partial(icmp6h,
292 					sizeof(struct icmp6hdr), skb->csum);
293 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
294 						      &fl6->daddr,
295 						      len, fl6->flowi6_proto,
296 						      skb->csum);
297 	} else {
298 		__wsum tmp_csum = 0;
299 
300 		skb_queue_walk(&sk->sk_write_queue, skb) {
301 			tmp_csum = csum_add(tmp_csum, skb->csum);
302 		}
303 
304 		tmp_csum = csum_partial(icmp6h,
305 					sizeof(struct icmp6hdr), tmp_csum);
306 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
307 						      &fl6->daddr,
308 						      len, fl6->flowi6_proto,
309 						      tmp_csum);
310 	}
311 	ip6_push_pending_frames(sk);
312 }
313 
314 struct icmpv6_msg {
315 	struct sk_buff	*skb;
316 	int		offset;
317 	uint8_t		type;
318 };
319 
320 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
321 {
322 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
323 	struct sk_buff *org_skb = msg->skb;
324 	__wsum csum;
325 
326 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
327 				      to, len);
328 	skb->csum = csum_block_add(skb->csum, csum, odd);
329 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
330 		nf_ct_attach(skb, org_skb);
331 	return 0;
332 }
333 
334 #if IS_ENABLED(CONFIG_IPV6_MIP6)
335 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
336 {
337 	struct ipv6hdr *iph = ipv6_hdr(skb);
338 	struct ipv6_destopt_hao *hao;
339 	struct in6_addr tmp;
340 	int off;
341 
342 	if (opt->dsthao) {
343 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
344 		if (likely(off >= 0)) {
345 			hao = (struct ipv6_destopt_hao *)
346 					(skb_network_header(skb) + off);
347 			tmp = iph->saddr;
348 			iph->saddr = hao->addr;
349 			hao->addr = tmp;
350 		}
351 	}
352 }
353 #else
354 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
355 #endif
356 
357 static struct dst_entry *icmpv6_route_lookup(struct net *net,
358 					     struct sk_buff *skb,
359 					     struct sock *sk,
360 					     struct flowi6 *fl6)
361 {
362 	struct dst_entry *dst, *dst2;
363 	struct flowi6 fl2;
364 	int err;
365 
366 	err = ip6_dst_lookup(net, sk, &dst, fl6);
367 	if (err)
368 		return ERR_PTR(err);
369 
370 	/*
371 	 * We won't send icmp if the destination is known
372 	 * anycast.
373 	 */
374 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
375 		net_dbg_ratelimited("icmp6_send: acast source\n");
376 		dst_release(dst);
377 		return ERR_PTR(-EINVAL);
378 	}
379 
380 	/* No need to clone since we're just using its address. */
381 	dst2 = dst;
382 
383 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
384 	if (!IS_ERR(dst)) {
385 		if (dst != dst2)
386 			return dst;
387 	} else {
388 		if (PTR_ERR(dst) == -EPERM)
389 			dst = NULL;
390 		else
391 			return dst;
392 	}
393 
394 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
395 	if (err)
396 		goto relookup_failed;
397 
398 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
399 	if (err)
400 		goto relookup_failed;
401 
402 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
403 	if (!IS_ERR(dst2)) {
404 		dst_release(dst);
405 		dst = dst2;
406 	} else {
407 		err = PTR_ERR(dst2);
408 		if (err == -EPERM) {
409 			dst_release(dst);
410 			return dst2;
411 		} else
412 			goto relookup_failed;
413 	}
414 
415 relookup_failed:
416 	if (dst)
417 		return dst;
418 	return ERR_PTR(err);
419 }
420 
421 static struct net_device *icmp6_dev(const struct sk_buff *skb)
422 {
423 	struct net_device *dev = skb->dev;
424 
425 	/* for local traffic to local address, skb dev is the loopback
426 	 * device. Check if there is a dst attached to the skb and if so
427 	 * get the real device index. Same is needed for replies to a link
428 	 * local address on a device enslaved to an L3 master device
429 	 */
430 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
431 		const struct rt6_info *rt6 = skb_rt6_info(skb);
432 
433 		if (rt6)
434 			dev = rt6->rt6i_idev->dev;
435 	}
436 
437 	return dev;
438 }
439 
440 static int icmp6_iif(const struct sk_buff *skb)
441 {
442 	return icmp6_dev(skb)->ifindex;
443 }
444 
445 /*
446  *	Send an ICMP message in response to a packet in error
447  */
448 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
449 		const struct in6_addr *force_saddr,
450 		const struct inet6_skb_parm *parm)
451 {
452 	struct inet6_dev *idev = NULL;
453 	struct ipv6hdr *hdr = ipv6_hdr(skb);
454 	struct sock *sk;
455 	struct net *net;
456 	struct ipv6_pinfo *np;
457 	const struct in6_addr *saddr = NULL;
458 	struct dst_entry *dst;
459 	struct icmp6hdr tmp_hdr;
460 	struct flowi6 fl6;
461 	struct icmpv6_msg msg;
462 	struct ipcm6_cookie ipc6;
463 	int iif = 0;
464 	int addr_type = 0;
465 	int len;
466 	u32 mark;
467 
468 	if ((u8 *)hdr < skb->head ||
469 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
470 		return;
471 
472 	if (!skb->dev)
473 		return;
474 	net = dev_net(skb->dev);
475 	mark = IP6_REPLY_MARK(net, skb->mark);
476 	/*
477 	 *	Make sure we respect the rules
478 	 *	i.e. RFC 1885 2.4(e)
479 	 *	Rule (e.1) is enforced by not using icmp6_send
480 	 *	in any code that processes icmp errors.
481 	 */
482 	addr_type = ipv6_addr_type(&hdr->daddr);
483 
484 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
485 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
486 		saddr = &hdr->daddr;
487 
488 	/*
489 	 *	Dest addr check
490 	 */
491 
492 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
493 		if (type != ICMPV6_PKT_TOOBIG &&
494 		    !(type == ICMPV6_PARAMPROB &&
495 		      code == ICMPV6_UNK_OPTION &&
496 		      (opt_unrec(skb, info))))
497 			return;
498 
499 		saddr = NULL;
500 	}
501 
502 	addr_type = ipv6_addr_type(&hdr->saddr);
503 
504 	/*
505 	 *	Source addr check
506 	 */
507 
508 	if (__ipv6_addr_needs_scope_id(addr_type)) {
509 		iif = icmp6_iif(skb);
510 	} else {
511 		/*
512 		 * The source device is used for looking up which routing table
513 		 * to use for sending an ICMP error.
514 		 */
515 		iif = l3mdev_master_ifindex(skb->dev);
516 	}
517 
518 	/*
519 	 *	Must not send error if the source does not uniquely
520 	 *	identify a single node (RFC2463 Section 2.4).
521 	 *	We check unspecified / multicast addresses here,
522 	 *	and anycast addresses will be checked later.
523 	 */
524 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
525 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
526 				    &hdr->saddr, &hdr->daddr);
527 		return;
528 	}
529 
530 	/*
531 	 *	Never answer to a ICMP packet.
532 	 */
533 	if (is_ineligible(skb)) {
534 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
535 				    &hdr->saddr, &hdr->daddr);
536 		return;
537 	}
538 
539 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
540 	local_bh_disable();
541 
542 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
543 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
544 		goto out_bh_enable;
545 
546 	mip6_addr_swap(skb, parm);
547 
548 	sk = icmpv6_xmit_lock(net);
549 	if (!sk)
550 		goto out_bh_enable;
551 
552 	memset(&fl6, 0, sizeof(fl6));
553 	fl6.flowi6_proto = IPPROTO_ICMPV6;
554 	fl6.daddr = hdr->saddr;
555 	if (force_saddr)
556 		saddr = force_saddr;
557 	if (saddr) {
558 		fl6.saddr = *saddr;
559 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
560 		/* select a more meaningful saddr from input if */
561 		struct net_device *in_netdev;
562 
563 		in_netdev = dev_get_by_index(net, parm->iif);
564 		if (in_netdev) {
565 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
566 					   inet6_sk(sk)->srcprefs,
567 					   &fl6.saddr);
568 			dev_put(in_netdev);
569 		}
570 	}
571 	fl6.flowi6_mark = mark;
572 	fl6.flowi6_oif = iif;
573 	fl6.fl6_icmp_type = type;
574 	fl6.fl6_icmp_code = code;
575 	fl6.flowi6_uid = sock_net_uid(net, NULL);
576 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
577 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
578 
579 	np = inet6_sk(sk);
580 
581 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
582 		goto out;
583 
584 	tmp_hdr.icmp6_type = type;
585 	tmp_hdr.icmp6_code = code;
586 	tmp_hdr.icmp6_cksum = 0;
587 	tmp_hdr.icmp6_pointer = htonl(info);
588 
589 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
590 		fl6.flowi6_oif = np->mcast_oif;
591 	else if (!fl6.flowi6_oif)
592 		fl6.flowi6_oif = np->ucast_oif;
593 
594 	ipcm6_init_sk(&ipc6, np);
595 	ipc6.sockc.mark = mark;
596 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
597 
598 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
599 	if (IS_ERR(dst))
600 		goto out;
601 
602 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
603 
604 	msg.skb = skb;
605 	msg.offset = skb_network_offset(skb);
606 	msg.type = type;
607 
608 	len = skb->len - msg.offset;
609 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
610 	if (len < 0) {
611 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
612 				    &hdr->saddr, &hdr->daddr);
613 		goto out_dst_release;
614 	}
615 
616 	rcu_read_lock();
617 	idev = __in6_dev_get(skb->dev);
618 
619 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
620 			    len + sizeof(struct icmp6hdr),
621 			    sizeof(struct icmp6hdr),
622 			    &ipc6, &fl6, (struct rt6_info *)dst,
623 			    MSG_DONTWAIT)) {
624 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
625 		ip6_flush_pending_frames(sk);
626 	} else {
627 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
628 					   len + sizeof(struct icmp6hdr));
629 	}
630 	rcu_read_unlock();
631 out_dst_release:
632 	dst_release(dst);
633 out:
634 	icmpv6_xmit_unlock(sk);
635 out_bh_enable:
636 	local_bh_enable();
637 }
638 EXPORT_SYMBOL(icmp6_send);
639 
640 /* Slightly more convenient version of icmp6_send.
641  */
642 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
643 {
644 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
645 	kfree_skb(skb);
646 }
647 
648 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
649  * if sufficient data bytes are available
650  * @nhs is the size of the tunnel header(s) :
651  *  Either an IPv4 header for SIT encap
652  *         an IPv4 header + GRE header for GRE encap
653  */
654 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
655 			       unsigned int data_len)
656 {
657 	struct in6_addr temp_saddr;
658 	struct rt6_info *rt;
659 	struct sk_buff *skb2;
660 	u32 info = 0;
661 
662 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
663 		return 1;
664 
665 	/* RFC 4884 (partial) support for ICMP extensions */
666 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
667 		data_len = 0;
668 
669 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
670 
671 	if (!skb2)
672 		return 1;
673 
674 	skb_dst_drop(skb2);
675 	skb_pull(skb2, nhs);
676 	skb_reset_network_header(skb2);
677 
678 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
679 			skb, 0);
680 
681 	if (rt && rt->dst.dev)
682 		skb2->dev = rt->dst.dev;
683 
684 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
685 
686 	if (data_len) {
687 		/* RFC 4884 (partial) support :
688 		 * insert 0 padding at the end, before the extensions
689 		 */
690 		__skb_push(skb2, nhs);
691 		skb_reset_network_header(skb2);
692 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
693 		memset(skb2->data + data_len - nhs, 0, nhs);
694 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
695 		 * and stored in reserved[0]
696 		 */
697 		info = (data_len/8) << 24;
698 	}
699 	if (type == ICMP_TIME_EXCEEDED)
700 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
701 			   info, &temp_saddr, IP6CB(skb2));
702 	else
703 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
704 			   info, &temp_saddr, IP6CB(skb2));
705 	if (rt)
706 		ip6_rt_put(rt);
707 
708 	kfree_skb(skb2);
709 
710 	return 0;
711 }
712 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
713 
714 static void icmpv6_echo_reply(struct sk_buff *skb)
715 {
716 	struct net *net = dev_net(skb->dev);
717 	struct sock *sk;
718 	struct inet6_dev *idev;
719 	struct ipv6_pinfo *np;
720 	const struct in6_addr *saddr = NULL;
721 	struct icmp6hdr *icmph = icmp6_hdr(skb);
722 	struct icmp6hdr tmp_hdr;
723 	struct flowi6 fl6;
724 	struct icmpv6_msg msg;
725 	struct dst_entry *dst;
726 	struct ipcm6_cookie ipc6;
727 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
728 	bool acast;
729 	u8 type;
730 
731 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
732 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
733 		return;
734 
735 	saddr = &ipv6_hdr(skb)->daddr;
736 
737 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
738 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
739 		return;
740 
741 	if (!ipv6_unicast_destination(skb) &&
742 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
743 		saddr = NULL;
744 
745 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
746 		type = ICMPV6_EXT_ECHO_REPLY;
747 	else
748 		type = ICMPV6_ECHO_REPLY;
749 
750 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
751 	tmp_hdr.icmp6_type = type;
752 
753 	memset(&fl6, 0, sizeof(fl6));
754 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
755 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
756 
757 	fl6.flowi6_proto = IPPROTO_ICMPV6;
758 	fl6.daddr = ipv6_hdr(skb)->saddr;
759 	if (saddr)
760 		fl6.saddr = *saddr;
761 	fl6.flowi6_oif = icmp6_iif(skb);
762 	fl6.fl6_icmp_type = type;
763 	fl6.flowi6_mark = mark;
764 	fl6.flowi6_uid = sock_net_uid(net, NULL);
765 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
766 
767 	local_bh_disable();
768 	sk = icmpv6_xmit_lock(net);
769 	if (!sk)
770 		goto out_bh_enable;
771 	np = inet6_sk(sk);
772 
773 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
774 		fl6.flowi6_oif = np->mcast_oif;
775 	else if (!fl6.flowi6_oif)
776 		fl6.flowi6_oif = np->ucast_oif;
777 
778 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
779 		goto out;
780 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
781 	if (IS_ERR(dst))
782 		goto out;
783 
784 	/* Check the ratelimit */
785 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
786 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
787 		goto out_dst_release;
788 
789 	idev = __in6_dev_get(skb->dev);
790 
791 	msg.skb = skb;
792 	msg.offset = 0;
793 	msg.type = type;
794 
795 	ipcm6_init_sk(&ipc6, np);
796 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
797 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
798 	ipc6.sockc.mark = mark;
799 
800 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
801 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
802 			goto out_dst_release;
803 
804 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
805 			    skb->len + sizeof(struct icmp6hdr),
806 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
807 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
808 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
809 		ip6_flush_pending_frames(sk);
810 	} else {
811 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
812 					   skb->len + sizeof(struct icmp6hdr));
813 	}
814 out_dst_release:
815 	dst_release(dst);
816 out:
817 	icmpv6_xmit_unlock(sk);
818 out_bh_enable:
819 	local_bh_enable();
820 }
821 
822 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
823 {
824 	struct inet6_skb_parm *opt = IP6CB(skb);
825 	const struct inet6_protocol *ipprot;
826 	int inner_offset;
827 	__be16 frag_off;
828 	u8 nexthdr;
829 	struct net *net = dev_net(skb->dev);
830 
831 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
832 		goto out;
833 
834 	seg6_icmp_srh(skb, opt);
835 
836 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
837 	if (ipv6_ext_hdr(nexthdr)) {
838 		/* now skip over extension headers */
839 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
840 						&nexthdr, &frag_off);
841 		if (inner_offset < 0)
842 			goto out;
843 	} else {
844 		inner_offset = sizeof(struct ipv6hdr);
845 	}
846 
847 	/* Checkin header including 8 bytes of inner protocol header. */
848 	if (!pskb_may_pull(skb, inner_offset+8))
849 		goto out;
850 
851 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
852 	   Without this we will not able f.e. to make source routed
853 	   pmtu discovery.
854 	   Corresponding argument (opt) to notifiers is already added.
855 	   --ANK (980726)
856 	 */
857 
858 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
859 	if (ipprot && ipprot->err_handler)
860 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
861 
862 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
863 	return;
864 
865 out:
866 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
867 }
868 
869 /*
870  *	Handle icmp messages
871  */
872 
873 static int icmpv6_rcv(struct sk_buff *skb)
874 {
875 	struct net *net = dev_net(skb->dev);
876 	struct net_device *dev = icmp6_dev(skb);
877 	struct inet6_dev *idev = __in6_dev_get(dev);
878 	const struct in6_addr *saddr, *daddr;
879 	struct icmp6hdr *hdr;
880 	u8 type;
881 	bool success = false;
882 
883 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
884 		struct sec_path *sp = skb_sec_path(skb);
885 		int nh;
886 
887 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
888 				 XFRM_STATE_ICMP))
889 			goto drop_no_count;
890 
891 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
892 			goto drop_no_count;
893 
894 		nh = skb_network_offset(skb);
895 		skb_set_network_header(skb, sizeof(*hdr));
896 
897 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
898 			goto drop_no_count;
899 
900 		skb_set_network_header(skb, nh);
901 	}
902 
903 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
904 
905 	saddr = &ipv6_hdr(skb)->saddr;
906 	daddr = &ipv6_hdr(skb)->daddr;
907 
908 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
909 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
910 				    saddr, daddr);
911 		goto csum_error;
912 	}
913 
914 	if (!pskb_pull(skb, sizeof(*hdr)))
915 		goto discard_it;
916 
917 	hdr = icmp6_hdr(skb);
918 
919 	type = hdr->icmp6_type;
920 
921 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
922 
923 	switch (type) {
924 	case ICMPV6_ECHO_REQUEST:
925 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
926 			icmpv6_echo_reply(skb);
927 		break;
928 	case ICMPV6_EXT_ECHO_REQUEST:
929 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
930 		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
931 			icmpv6_echo_reply(skb);
932 		break;
933 
934 	case ICMPV6_ECHO_REPLY:
935 		success = ping_rcv(skb);
936 		break;
937 
938 	case ICMPV6_EXT_ECHO_REPLY:
939 		success = ping_rcv(skb);
940 		break;
941 
942 	case ICMPV6_PKT_TOOBIG:
943 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
944 		   standard destination cache. Seems, only "advanced"
945 		   destination cache will allow to solve this problem
946 		   --ANK (980726)
947 		 */
948 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
949 			goto discard_it;
950 		hdr = icmp6_hdr(skb);
951 
952 		/* to notify */
953 		fallthrough;
954 	case ICMPV6_DEST_UNREACH:
955 	case ICMPV6_TIME_EXCEED:
956 	case ICMPV6_PARAMPROB:
957 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
958 		break;
959 
960 	case NDISC_ROUTER_SOLICITATION:
961 	case NDISC_ROUTER_ADVERTISEMENT:
962 	case NDISC_NEIGHBOUR_SOLICITATION:
963 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
964 	case NDISC_REDIRECT:
965 		ndisc_rcv(skb);
966 		break;
967 
968 	case ICMPV6_MGM_QUERY:
969 		igmp6_event_query(skb);
970 		return 0;
971 
972 	case ICMPV6_MGM_REPORT:
973 		igmp6_event_report(skb);
974 		return 0;
975 
976 	case ICMPV6_MGM_REDUCTION:
977 	case ICMPV6_NI_QUERY:
978 	case ICMPV6_NI_REPLY:
979 	case ICMPV6_MLD2_REPORT:
980 	case ICMPV6_DHAAD_REQUEST:
981 	case ICMPV6_DHAAD_REPLY:
982 	case ICMPV6_MOBILE_PREFIX_SOL:
983 	case ICMPV6_MOBILE_PREFIX_ADV:
984 		break;
985 
986 	default:
987 		/* informational */
988 		if (type & ICMPV6_INFOMSG_MASK)
989 			break;
990 
991 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
992 				    saddr, daddr);
993 
994 		/*
995 		 * error of unknown type.
996 		 * must pass to upper level
997 		 */
998 
999 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
1000 	}
1001 
1002 	/* until the v6 path can be better sorted assume failure and
1003 	 * preserve the status quo behaviour for the rest of the paths to here
1004 	 */
1005 	if (success)
1006 		consume_skb(skb);
1007 	else
1008 		kfree_skb(skb);
1009 
1010 	return 0;
1011 
1012 csum_error:
1013 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1014 discard_it:
1015 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1016 drop_no_count:
1017 	kfree_skb(skb);
1018 	return 0;
1019 }
1020 
1021 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1022 		      u8 type,
1023 		      const struct in6_addr *saddr,
1024 		      const struct in6_addr *daddr,
1025 		      int oif)
1026 {
1027 	memset(fl6, 0, sizeof(*fl6));
1028 	fl6->saddr = *saddr;
1029 	fl6->daddr = *daddr;
1030 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1031 	fl6->fl6_icmp_type	= type;
1032 	fl6->fl6_icmp_code	= 0;
1033 	fl6->flowi6_oif		= oif;
1034 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1035 }
1036 
1037 static void __net_exit icmpv6_sk_exit(struct net *net)
1038 {
1039 	int i;
1040 
1041 	for_each_possible_cpu(i)
1042 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1043 	free_percpu(net->ipv6.icmp_sk);
1044 }
1045 
1046 static int __net_init icmpv6_sk_init(struct net *net)
1047 {
1048 	struct sock *sk;
1049 	int err, i;
1050 
1051 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1052 	if (!net->ipv6.icmp_sk)
1053 		return -ENOMEM;
1054 
1055 	for_each_possible_cpu(i) {
1056 		err = inet_ctl_sock_create(&sk, PF_INET6,
1057 					   SOCK_RAW, IPPROTO_ICMPV6, net);
1058 		if (err < 0) {
1059 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1060 			       err);
1061 			goto fail;
1062 		}
1063 
1064 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1065 
1066 		/* Enough space for 2 64K ICMP packets, including
1067 		 * sk_buff struct overhead.
1068 		 */
1069 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1070 	}
1071 	return 0;
1072 
1073  fail:
1074 	icmpv6_sk_exit(net);
1075 	return err;
1076 }
1077 
1078 static struct pernet_operations icmpv6_sk_ops = {
1079 	.init = icmpv6_sk_init,
1080 	.exit = icmpv6_sk_exit,
1081 };
1082 
1083 int __init icmpv6_init(void)
1084 {
1085 	int err;
1086 
1087 	err = register_pernet_subsys(&icmpv6_sk_ops);
1088 	if (err < 0)
1089 		return err;
1090 
1091 	err = -EAGAIN;
1092 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1093 		goto fail;
1094 
1095 	err = inet6_register_icmp_sender(icmp6_send);
1096 	if (err)
1097 		goto sender_reg_err;
1098 	return 0;
1099 
1100 sender_reg_err:
1101 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1102 fail:
1103 	pr_err("Failed to register ICMP6 protocol\n");
1104 	unregister_pernet_subsys(&icmpv6_sk_ops);
1105 	return err;
1106 }
1107 
1108 void icmpv6_cleanup(void)
1109 {
1110 	inet6_unregister_icmp_sender(icmp6_send);
1111 	unregister_pernet_subsys(&icmpv6_sk_ops);
1112 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1113 }
1114 
1115 
1116 static const struct icmp6_err {
1117 	int err;
1118 	int fatal;
1119 } tab_unreach[] = {
1120 	{	/* NOROUTE */
1121 		.err	= ENETUNREACH,
1122 		.fatal	= 0,
1123 	},
1124 	{	/* ADM_PROHIBITED */
1125 		.err	= EACCES,
1126 		.fatal	= 1,
1127 	},
1128 	{	/* Was NOT_NEIGHBOUR, now reserved */
1129 		.err	= EHOSTUNREACH,
1130 		.fatal	= 0,
1131 	},
1132 	{	/* ADDR_UNREACH	*/
1133 		.err	= EHOSTUNREACH,
1134 		.fatal	= 0,
1135 	},
1136 	{	/* PORT_UNREACH	*/
1137 		.err	= ECONNREFUSED,
1138 		.fatal	= 1,
1139 	},
1140 	{	/* POLICY_FAIL */
1141 		.err	= EACCES,
1142 		.fatal	= 1,
1143 	},
1144 	{	/* REJECT_ROUTE	*/
1145 		.err	= EACCES,
1146 		.fatal	= 1,
1147 	},
1148 };
1149 
1150 int icmpv6_err_convert(u8 type, u8 code, int *err)
1151 {
1152 	int fatal = 0;
1153 
1154 	*err = EPROTO;
1155 
1156 	switch (type) {
1157 	case ICMPV6_DEST_UNREACH:
1158 		fatal = 1;
1159 		if (code < ARRAY_SIZE(tab_unreach)) {
1160 			*err  = tab_unreach[code].err;
1161 			fatal = tab_unreach[code].fatal;
1162 		}
1163 		break;
1164 
1165 	case ICMPV6_PKT_TOOBIG:
1166 		*err = EMSGSIZE;
1167 		break;
1168 
1169 	case ICMPV6_PARAMPROB:
1170 		*err = EPROTO;
1171 		fatal = 1;
1172 		break;
1173 
1174 	case ICMPV6_TIME_EXCEED:
1175 		*err = EHOSTUNREACH;
1176 		break;
1177 	}
1178 
1179 	return fatal;
1180 }
1181 EXPORT_SYMBOL(icmpv6_err_convert);
1182 
1183 #ifdef CONFIG_SYSCTL
1184 static struct ctl_table ipv6_icmp_table_template[] = {
1185 	{
1186 		.procname	= "ratelimit",
1187 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1188 		.maxlen		= sizeof(int),
1189 		.mode		= 0644,
1190 		.proc_handler	= proc_dointvec_ms_jiffies,
1191 	},
1192 	{
1193 		.procname	= "echo_ignore_all",
1194 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1195 		.maxlen		= sizeof(u8),
1196 		.mode		= 0644,
1197 		.proc_handler = proc_dou8vec_minmax,
1198 	},
1199 	{
1200 		.procname	= "echo_ignore_multicast",
1201 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1202 		.maxlen		= sizeof(u8),
1203 		.mode		= 0644,
1204 		.proc_handler = proc_dou8vec_minmax,
1205 	},
1206 	{
1207 		.procname	= "echo_ignore_anycast",
1208 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1209 		.maxlen		= sizeof(u8),
1210 		.mode		= 0644,
1211 		.proc_handler = proc_dou8vec_minmax,
1212 	},
1213 	{
1214 		.procname	= "ratemask",
1215 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1216 		.maxlen		= ICMPV6_MSG_MAX + 1,
1217 		.mode		= 0644,
1218 		.proc_handler = proc_do_large_bitmap,
1219 	},
1220 	{ },
1221 };
1222 
1223 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1224 {
1225 	struct ctl_table *table;
1226 
1227 	table = kmemdup(ipv6_icmp_table_template,
1228 			sizeof(ipv6_icmp_table_template),
1229 			GFP_KERNEL);
1230 
1231 	if (table) {
1232 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1233 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1234 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1235 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1236 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1237 	}
1238 	return table;
1239 }
1240 #endif
1241