xref: /openbmc/linux/net/ipv6/icmp.c (revision bc617613)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180 	if (icmpv6_mask_allow(net, type))
181 		return true;
182 
183 	if (icmp_global_allow())
184 		return true;
185 
186 	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	if (!res)
229 		__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
230 				  ICMP6_MIB_RATELIMITHOST);
231 	dst_release(dst);
232 	return res;
233 }
234 
235 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
236 				  struct flowi6 *fl6)
237 {
238 	struct net *net = sock_net(sk);
239 	struct dst_entry *dst;
240 	bool res = false;
241 
242 	dst = ip6_route_output(net, sk, fl6);
243 	if (!dst->error) {
244 		struct rt6_info *rt = (struct rt6_info *)dst;
245 		struct in6_addr prefsrc;
246 
247 		rt6_get_prefsrc(rt, &prefsrc);
248 		res = !ipv6_addr_any(&prefsrc);
249 	}
250 	dst_release(dst);
251 	return res;
252 }
253 
254 /*
255  *	an inline helper for the "simple" if statement below
256  *	checks if parameter problem report is caused by an
257  *	unrecognized IPv6 option that has the Option Type
258  *	highest-order two bits set to 10
259  */
260 
261 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
262 {
263 	u8 _optval, *op;
264 
265 	offset += skb_network_offset(skb);
266 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
267 	if (!op)
268 		return true;
269 	return (*op & 0xC0) == 0x80;
270 }
271 
272 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
273 				struct icmp6hdr *thdr, int len)
274 {
275 	struct sk_buff *skb;
276 	struct icmp6hdr *icmp6h;
277 
278 	skb = skb_peek(&sk->sk_write_queue);
279 	if (!skb)
280 		return;
281 
282 	icmp6h = icmp6_hdr(skb);
283 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
284 	icmp6h->icmp6_cksum = 0;
285 
286 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
287 		skb->csum = csum_partial(icmp6h,
288 					sizeof(struct icmp6hdr), skb->csum);
289 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
290 						      &fl6->daddr,
291 						      len, fl6->flowi6_proto,
292 						      skb->csum);
293 	} else {
294 		__wsum tmp_csum = 0;
295 
296 		skb_queue_walk(&sk->sk_write_queue, skb) {
297 			tmp_csum = csum_add(tmp_csum, skb->csum);
298 		}
299 
300 		tmp_csum = csum_partial(icmp6h,
301 					sizeof(struct icmp6hdr), tmp_csum);
302 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
303 						      &fl6->daddr,
304 						      len, fl6->flowi6_proto,
305 						      tmp_csum);
306 	}
307 	ip6_push_pending_frames(sk);
308 }
309 
310 struct icmpv6_msg {
311 	struct sk_buff	*skb;
312 	int		offset;
313 	uint8_t		type;
314 };
315 
316 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
317 {
318 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
319 	struct sk_buff *org_skb = msg->skb;
320 	__wsum csum;
321 
322 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
323 				      to, len);
324 	skb->csum = csum_block_add(skb->csum, csum, odd);
325 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
326 		nf_ct_attach(skb, org_skb);
327 	return 0;
328 }
329 
330 #if IS_ENABLED(CONFIG_IPV6_MIP6)
331 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
332 {
333 	struct ipv6hdr *iph = ipv6_hdr(skb);
334 	struct ipv6_destopt_hao *hao;
335 	int off;
336 
337 	if (opt->dsthao) {
338 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
339 		if (likely(off >= 0)) {
340 			hao = (struct ipv6_destopt_hao *)
341 					(skb_network_header(skb) + off);
342 			swap(iph->saddr, hao->addr);
343 		}
344 	}
345 }
346 #else
347 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
348 #endif
349 
350 static struct dst_entry *icmpv6_route_lookup(struct net *net,
351 					     struct sk_buff *skb,
352 					     struct sock *sk,
353 					     struct flowi6 *fl6)
354 {
355 	struct dst_entry *dst, *dst2;
356 	struct flowi6 fl2;
357 	int err;
358 
359 	err = ip6_dst_lookup(net, sk, &dst, fl6);
360 	if (err)
361 		return ERR_PTR(err);
362 
363 	/*
364 	 * We won't send icmp if the destination is known
365 	 * anycast.
366 	 */
367 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
368 		net_dbg_ratelimited("icmp6_send: acast source\n");
369 		dst_release(dst);
370 		return ERR_PTR(-EINVAL);
371 	}
372 
373 	/* No need to clone since we're just using its address. */
374 	dst2 = dst;
375 
376 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
377 	if (!IS_ERR(dst)) {
378 		if (dst != dst2)
379 			return dst;
380 	} else {
381 		if (PTR_ERR(dst) == -EPERM)
382 			dst = NULL;
383 		else
384 			return dst;
385 	}
386 
387 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
388 	if (err)
389 		goto relookup_failed;
390 
391 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
392 	if (err)
393 		goto relookup_failed;
394 
395 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
396 	if (!IS_ERR(dst2)) {
397 		dst_release(dst);
398 		dst = dst2;
399 	} else {
400 		err = PTR_ERR(dst2);
401 		if (err == -EPERM) {
402 			dst_release(dst);
403 			return dst2;
404 		} else
405 			goto relookup_failed;
406 	}
407 
408 relookup_failed:
409 	if (dst)
410 		return dst;
411 	return ERR_PTR(err);
412 }
413 
414 static struct net_device *icmp6_dev(const struct sk_buff *skb)
415 {
416 	struct net_device *dev = skb->dev;
417 
418 	/* for local traffic to local address, skb dev is the loopback
419 	 * device. Check if there is a dst attached to the skb and if so
420 	 * get the real device index. Same is needed for replies to a link
421 	 * local address on a device enslaved to an L3 master device
422 	 */
423 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
424 		const struct rt6_info *rt6 = skb_rt6_info(skb);
425 
426 		if (rt6)
427 			dev = rt6->rt6i_idev->dev;
428 	}
429 
430 	return dev;
431 }
432 
433 static int icmp6_iif(const struct sk_buff *skb)
434 {
435 	return icmp6_dev(skb)->ifindex;
436 }
437 
438 /*
439  *	Send an ICMP message in response to a packet in error
440  */
441 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
442 		const struct in6_addr *force_saddr,
443 		const struct inet6_skb_parm *parm)
444 {
445 	struct inet6_dev *idev = NULL;
446 	struct ipv6hdr *hdr = ipv6_hdr(skb);
447 	struct sock *sk;
448 	struct net *net;
449 	struct ipv6_pinfo *np;
450 	const struct in6_addr *saddr = NULL;
451 	struct dst_entry *dst;
452 	struct icmp6hdr tmp_hdr;
453 	struct flowi6 fl6;
454 	struct icmpv6_msg msg;
455 	struct ipcm6_cookie ipc6;
456 	int iif = 0;
457 	int addr_type = 0;
458 	int len;
459 	u32 mark;
460 
461 	if ((u8 *)hdr < skb->head ||
462 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
463 		return;
464 
465 	if (!skb->dev)
466 		return;
467 	net = dev_net(skb->dev);
468 	mark = IP6_REPLY_MARK(net, skb->mark);
469 	/*
470 	 *	Make sure we respect the rules
471 	 *	i.e. RFC 1885 2.4(e)
472 	 *	Rule (e.1) is enforced by not using icmp6_send
473 	 *	in any code that processes icmp errors.
474 	 */
475 	addr_type = ipv6_addr_type(&hdr->daddr);
476 
477 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
478 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
479 		saddr = &hdr->daddr;
480 
481 	/*
482 	 *	Dest addr check
483 	 */
484 
485 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
486 		if (type != ICMPV6_PKT_TOOBIG &&
487 		    !(type == ICMPV6_PARAMPROB &&
488 		      code == ICMPV6_UNK_OPTION &&
489 		      (opt_unrec(skb, info))))
490 			return;
491 
492 		saddr = NULL;
493 	}
494 
495 	addr_type = ipv6_addr_type(&hdr->saddr);
496 
497 	/*
498 	 *	Source addr check
499 	 */
500 
501 	if (__ipv6_addr_needs_scope_id(addr_type)) {
502 		iif = icmp6_iif(skb);
503 	} else {
504 		/*
505 		 * The source device is used for looking up which routing table
506 		 * to use for sending an ICMP error.
507 		 */
508 		iif = l3mdev_master_ifindex(skb->dev);
509 	}
510 
511 	/*
512 	 *	Must not send error if the source does not uniquely
513 	 *	identify a single node (RFC2463 Section 2.4).
514 	 *	We check unspecified / multicast addresses here,
515 	 *	and anycast addresses will be checked later.
516 	 */
517 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
518 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
519 				    &hdr->saddr, &hdr->daddr);
520 		return;
521 	}
522 
523 	/*
524 	 *	Never answer to a ICMP packet.
525 	 */
526 	if (is_ineligible(skb)) {
527 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
528 				    &hdr->saddr, &hdr->daddr);
529 		return;
530 	}
531 
532 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
533 	local_bh_disable();
534 
535 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
536 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
537 		goto out_bh_enable;
538 
539 	mip6_addr_swap(skb, parm);
540 
541 	sk = icmpv6_xmit_lock(net);
542 	if (!sk)
543 		goto out_bh_enable;
544 
545 	memset(&fl6, 0, sizeof(fl6));
546 	fl6.flowi6_proto = IPPROTO_ICMPV6;
547 	fl6.daddr = hdr->saddr;
548 	if (force_saddr)
549 		saddr = force_saddr;
550 	if (saddr) {
551 		fl6.saddr = *saddr;
552 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
553 		/* select a more meaningful saddr from input if */
554 		struct net_device *in_netdev;
555 
556 		in_netdev = dev_get_by_index(net, parm->iif);
557 		if (in_netdev) {
558 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
559 					   inet6_sk(sk)->srcprefs,
560 					   &fl6.saddr);
561 			dev_put(in_netdev);
562 		}
563 	}
564 	fl6.flowi6_mark = mark;
565 	fl6.flowi6_oif = iif;
566 	fl6.fl6_icmp_type = type;
567 	fl6.fl6_icmp_code = code;
568 	fl6.flowi6_uid = sock_net_uid(net, NULL);
569 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
570 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
571 
572 	np = inet6_sk(sk);
573 
574 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
575 		goto out;
576 
577 	tmp_hdr.icmp6_type = type;
578 	tmp_hdr.icmp6_code = code;
579 	tmp_hdr.icmp6_cksum = 0;
580 	tmp_hdr.icmp6_pointer = htonl(info);
581 
582 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
583 		fl6.flowi6_oif = np->mcast_oif;
584 	else if (!fl6.flowi6_oif)
585 		fl6.flowi6_oif = np->ucast_oif;
586 
587 	ipcm6_init_sk(&ipc6, np);
588 	ipc6.sockc.mark = mark;
589 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
590 
591 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
592 	if (IS_ERR(dst))
593 		goto out;
594 
595 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
596 
597 	msg.skb = skb;
598 	msg.offset = skb_network_offset(skb);
599 	msg.type = type;
600 
601 	len = skb->len - msg.offset;
602 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
603 	if (len < 0) {
604 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
605 				    &hdr->saddr, &hdr->daddr);
606 		goto out_dst_release;
607 	}
608 
609 	rcu_read_lock();
610 	idev = __in6_dev_get(skb->dev);
611 
612 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
613 			    len + sizeof(struct icmp6hdr),
614 			    sizeof(struct icmp6hdr),
615 			    &ipc6, &fl6, (struct rt6_info *)dst,
616 			    MSG_DONTWAIT)) {
617 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
618 		ip6_flush_pending_frames(sk);
619 	} else {
620 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
621 					   len + sizeof(struct icmp6hdr));
622 	}
623 	rcu_read_unlock();
624 out_dst_release:
625 	dst_release(dst);
626 out:
627 	icmpv6_xmit_unlock(sk);
628 out_bh_enable:
629 	local_bh_enable();
630 }
631 EXPORT_SYMBOL(icmp6_send);
632 
633 /* Slightly more convenient version of icmp6_send with drop reasons.
634  */
635 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
636 			      enum skb_drop_reason reason)
637 {
638 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
639 	kfree_skb_reason(skb, reason);
640 }
641 
642 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
643  * if sufficient data bytes are available
644  * @nhs is the size of the tunnel header(s) :
645  *  Either an IPv4 header for SIT encap
646  *         an IPv4 header + GRE header for GRE encap
647  */
648 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
649 			       unsigned int data_len)
650 {
651 	struct in6_addr temp_saddr;
652 	struct rt6_info *rt;
653 	struct sk_buff *skb2;
654 	u32 info = 0;
655 
656 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
657 		return 1;
658 
659 	/* RFC 4884 (partial) support for ICMP extensions */
660 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
661 		data_len = 0;
662 
663 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
664 
665 	if (!skb2)
666 		return 1;
667 
668 	skb_dst_drop(skb2);
669 	skb_pull(skb2, nhs);
670 	skb_reset_network_header(skb2);
671 
672 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
673 			skb, 0);
674 
675 	if (rt && rt->dst.dev)
676 		skb2->dev = rt->dst.dev;
677 
678 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
679 
680 	if (data_len) {
681 		/* RFC 4884 (partial) support :
682 		 * insert 0 padding at the end, before the extensions
683 		 */
684 		__skb_push(skb2, nhs);
685 		skb_reset_network_header(skb2);
686 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
687 		memset(skb2->data + data_len - nhs, 0, nhs);
688 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
689 		 * and stored in reserved[0]
690 		 */
691 		info = (data_len/8) << 24;
692 	}
693 	if (type == ICMP_TIME_EXCEEDED)
694 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
695 			   info, &temp_saddr, IP6CB(skb2));
696 	else
697 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
698 			   info, &temp_saddr, IP6CB(skb2));
699 	if (rt)
700 		ip6_rt_put(rt);
701 
702 	kfree_skb(skb2);
703 
704 	return 0;
705 }
706 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
707 
708 static void icmpv6_echo_reply(struct sk_buff *skb)
709 {
710 	struct net *net = dev_net(skb->dev);
711 	struct sock *sk;
712 	struct inet6_dev *idev;
713 	struct ipv6_pinfo *np;
714 	const struct in6_addr *saddr = NULL;
715 	struct icmp6hdr *icmph = icmp6_hdr(skb);
716 	struct icmp6hdr tmp_hdr;
717 	struct flowi6 fl6;
718 	struct icmpv6_msg msg;
719 	struct dst_entry *dst;
720 	struct ipcm6_cookie ipc6;
721 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
722 	bool acast;
723 	u8 type;
724 
725 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
726 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
727 		return;
728 
729 	saddr = &ipv6_hdr(skb)->daddr;
730 
731 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
732 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
733 		return;
734 
735 	if (!ipv6_unicast_destination(skb) &&
736 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
737 		saddr = NULL;
738 
739 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
740 		type = ICMPV6_EXT_ECHO_REPLY;
741 	else
742 		type = ICMPV6_ECHO_REPLY;
743 
744 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
745 	tmp_hdr.icmp6_type = type;
746 
747 	memset(&fl6, 0, sizeof(fl6));
748 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
749 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
750 
751 	fl6.flowi6_proto = IPPROTO_ICMPV6;
752 	fl6.daddr = ipv6_hdr(skb)->saddr;
753 	if (saddr)
754 		fl6.saddr = *saddr;
755 	fl6.flowi6_oif = icmp6_iif(skb);
756 	fl6.fl6_icmp_type = type;
757 	fl6.flowi6_mark = mark;
758 	fl6.flowi6_uid = sock_net_uid(net, NULL);
759 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
760 
761 	local_bh_disable();
762 	sk = icmpv6_xmit_lock(net);
763 	if (!sk)
764 		goto out_bh_enable;
765 	np = inet6_sk(sk);
766 
767 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
768 		fl6.flowi6_oif = np->mcast_oif;
769 	else if (!fl6.flowi6_oif)
770 		fl6.flowi6_oif = np->ucast_oif;
771 
772 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
773 		goto out;
774 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
775 	if (IS_ERR(dst))
776 		goto out;
777 
778 	/* Check the ratelimit */
779 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
780 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
781 		goto out_dst_release;
782 
783 	idev = __in6_dev_get(skb->dev);
784 
785 	msg.skb = skb;
786 	msg.offset = 0;
787 	msg.type = type;
788 
789 	ipcm6_init_sk(&ipc6, np);
790 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
791 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
792 	ipc6.sockc.mark = mark;
793 
794 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
795 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
796 			goto out_dst_release;
797 
798 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
799 			    skb->len + sizeof(struct icmp6hdr),
800 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
801 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
802 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
803 		ip6_flush_pending_frames(sk);
804 	} else {
805 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
806 					   skb->len + sizeof(struct icmp6hdr));
807 	}
808 out_dst_release:
809 	dst_release(dst);
810 out:
811 	icmpv6_xmit_unlock(sk);
812 out_bh_enable:
813 	local_bh_enable();
814 }
815 
816 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
817 {
818 	struct inet6_skb_parm *opt = IP6CB(skb);
819 	const struct inet6_protocol *ipprot;
820 	int inner_offset;
821 	__be16 frag_off;
822 	u8 nexthdr;
823 	struct net *net = dev_net(skb->dev);
824 
825 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
826 		goto out;
827 
828 	seg6_icmp_srh(skb, opt);
829 
830 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
831 	if (ipv6_ext_hdr(nexthdr)) {
832 		/* now skip over extension headers */
833 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
834 						&nexthdr, &frag_off);
835 		if (inner_offset < 0)
836 			goto out;
837 	} else {
838 		inner_offset = sizeof(struct ipv6hdr);
839 	}
840 
841 	/* Checkin header including 8 bytes of inner protocol header. */
842 	if (!pskb_may_pull(skb, inner_offset+8))
843 		goto out;
844 
845 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
846 	   Without this we will not able f.e. to make source routed
847 	   pmtu discovery.
848 	   Corresponding argument (opt) to notifiers is already added.
849 	   --ANK (980726)
850 	 */
851 
852 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
853 	if (ipprot && ipprot->err_handler)
854 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
855 
856 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
857 	return;
858 
859 out:
860 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
861 }
862 
863 /*
864  *	Handle icmp messages
865  */
866 
867 static int icmpv6_rcv(struct sk_buff *skb)
868 {
869 	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
870 	struct net *net = dev_net(skb->dev);
871 	struct net_device *dev = icmp6_dev(skb);
872 	struct inet6_dev *idev = __in6_dev_get(dev);
873 	const struct in6_addr *saddr, *daddr;
874 	struct icmp6hdr *hdr;
875 	u8 type;
876 
877 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
878 		struct sec_path *sp = skb_sec_path(skb);
879 		int nh;
880 
881 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
882 				 XFRM_STATE_ICMP)) {
883 			reason = SKB_DROP_REASON_XFRM_POLICY;
884 			goto drop_no_count;
885 		}
886 
887 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
888 			goto drop_no_count;
889 
890 		nh = skb_network_offset(skb);
891 		skb_set_network_header(skb, sizeof(*hdr));
892 
893 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
894 						skb)) {
895 			reason = SKB_DROP_REASON_XFRM_POLICY;
896 			goto drop_no_count;
897 		}
898 
899 		skb_set_network_header(skb, nh);
900 	}
901 
902 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
903 
904 	saddr = &ipv6_hdr(skb)->saddr;
905 	daddr = &ipv6_hdr(skb)->daddr;
906 
907 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
908 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
909 				    saddr, daddr);
910 		goto csum_error;
911 	}
912 
913 	if (!pskb_pull(skb, sizeof(*hdr)))
914 		goto discard_it;
915 
916 	hdr = icmp6_hdr(skb);
917 
918 	type = hdr->icmp6_type;
919 
920 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
921 
922 	switch (type) {
923 	case ICMPV6_ECHO_REQUEST:
924 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
925 			icmpv6_echo_reply(skb);
926 		break;
927 	case ICMPV6_EXT_ECHO_REQUEST:
928 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
929 		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
930 			icmpv6_echo_reply(skb);
931 		break;
932 
933 	case ICMPV6_ECHO_REPLY:
934 		reason = ping_rcv(skb);
935 		break;
936 
937 	case ICMPV6_EXT_ECHO_REPLY:
938 		reason = ping_rcv(skb);
939 		break;
940 
941 	case ICMPV6_PKT_TOOBIG:
942 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
943 		   standard destination cache. Seems, only "advanced"
944 		   destination cache will allow to solve this problem
945 		   --ANK (980726)
946 		 */
947 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
948 			goto discard_it;
949 		hdr = icmp6_hdr(skb);
950 
951 		/* to notify */
952 		fallthrough;
953 	case ICMPV6_DEST_UNREACH:
954 	case ICMPV6_TIME_EXCEED:
955 	case ICMPV6_PARAMPROB:
956 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
957 		break;
958 
959 	case NDISC_ROUTER_SOLICITATION:
960 	case NDISC_ROUTER_ADVERTISEMENT:
961 	case NDISC_NEIGHBOUR_SOLICITATION:
962 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
963 	case NDISC_REDIRECT:
964 		ndisc_rcv(skb);
965 		break;
966 
967 	case ICMPV6_MGM_QUERY:
968 		igmp6_event_query(skb);
969 		return 0;
970 
971 	case ICMPV6_MGM_REPORT:
972 		igmp6_event_report(skb);
973 		return 0;
974 
975 	case ICMPV6_MGM_REDUCTION:
976 	case ICMPV6_NI_QUERY:
977 	case ICMPV6_NI_REPLY:
978 	case ICMPV6_MLD2_REPORT:
979 	case ICMPV6_DHAAD_REQUEST:
980 	case ICMPV6_DHAAD_REPLY:
981 	case ICMPV6_MOBILE_PREFIX_SOL:
982 	case ICMPV6_MOBILE_PREFIX_ADV:
983 		break;
984 
985 	default:
986 		/* informational */
987 		if (type & ICMPV6_INFOMSG_MASK)
988 			break;
989 
990 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
991 				    saddr, daddr);
992 
993 		/*
994 		 * error of unknown type.
995 		 * must pass to upper level
996 		 */
997 
998 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
999 	}
1000 
1001 	/* until the v6 path can be better sorted assume failure and
1002 	 * preserve the status quo behaviour for the rest of the paths to here
1003 	 */
1004 	if (reason)
1005 		kfree_skb_reason(skb, reason);
1006 	else
1007 		consume_skb(skb);
1008 
1009 	return 0;
1010 
1011 csum_error:
1012 	reason = SKB_DROP_REASON_ICMP_CSUM;
1013 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1014 discard_it:
1015 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1016 drop_no_count:
1017 	kfree_skb_reason(skb, reason);
1018 	return 0;
1019 }
1020 
1021 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1022 		      u8 type,
1023 		      const struct in6_addr *saddr,
1024 		      const struct in6_addr *daddr,
1025 		      int oif)
1026 {
1027 	memset(fl6, 0, sizeof(*fl6));
1028 	fl6->saddr = *saddr;
1029 	fl6->daddr = *daddr;
1030 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1031 	fl6->fl6_icmp_type	= type;
1032 	fl6->fl6_icmp_code	= 0;
1033 	fl6->flowi6_oif		= oif;
1034 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1035 }
1036 
1037 int __init icmpv6_init(void)
1038 {
1039 	struct sock *sk;
1040 	int err, i;
1041 
1042 	for_each_possible_cpu(i) {
1043 		err = inet_ctl_sock_create(&sk, PF_INET6,
1044 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1045 		if (err < 0) {
1046 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1047 			       err);
1048 			return err;
1049 		}
1050 
1051 		per_cpu(ipv6_icmp_sk, i) = sk;
1052 
1053 		/* Enough space for 2 64K ICMP packets, including
1054 		 * sk_buff struct overhead.
1055 		 */
1056 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1057 	}
1058 
1059 	err = -EAGAIN;
1060 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1061 		goto fail;
1062 
1063 	err = inet6_register_icmp_sender(icmp6_send);
1064 	if (err)
1065 		goto sender_reg_err;
1066 	return 0;
1067 
1068 sender_reg_err:
1069 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1070 fail:
1071 	pr_err("Failed to register ICMP6 protocol\n");
1072 	return err;
1073 }
1074 
1075 void icmpv6_cleanup(void)
1076 {
1077 	inet6_unregister_icmp_sender(icmp6_send);
1078 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1079 }
1080 
1081 
1082 static const struct icmp6_err {
1083 	int err;
1084 	int fatal;
1085 } tab_unreach[] = {
1086 	{	/* NOROUTE */
1087 		.err	= ENETUNREACH,
1088 		.fatal	= 0,
1089 	},
1090 	{	/* ADM_PROHIBITED */
1091 		.err	= EACCES,
1092 		.fatal	= 1,
1093 	},
1094 	{	/* Was NOT_NEIGHBOUR, now reserved */
1095 		.err	= EHOSTUNREACH,
1096 		.fatal	= 0,
1097 	},
1098 	{	/* ADDR_UNREACH	*/
1099 		.err	= EHOSTUNREACH,
1100 		.fatal	= 0,
1101 	},
1102 	{	/* PORT_UNREACH	*/
1103 		.err	= ECONNREFUSED,
1104 		.fatal	= 1,
1105 	},
1106 	{	/* POLICY_FAIL */
1107 		.err	= EACCES,
1108 		.fatal	= 1,
1109 	},
1110 	{	/* REJECT_ROUTE	*/
1111 		.err	= EACCES,
1112 		.fatal	= 1,
1113 	},
1114 };
1115 
1116 int icmpv6_err_convert(u8 type, u8 code, int *err)
1117 {
1118 	int fatal = 0;
1119 
1120 	*err = EPROTO;
1121 
1122 	switch (type) {
1123 	case ICMPV6_DEST_UNREACH:
1124 		fatal = 1;
1125 		if (code < ARRAY_SIZE(tab_unreach)) {
1126 			*err  = tab_unreach[code].err;
1127 			fatal = tab_unreach[code].fatal;
1128 		}
1129 		break;
1130 
1131 	case ICMPV6_PKT_TOOBIG:
1132 		*err = EMSGSIZE;
1133 		break;
1134 
1135 	case ICMPV6_PARAMPROB:
1136 		*err = EPROTO;
1137 		fatal = 1;
1138 		break;
1139 
1140 	case ICMPV6_TIME_EXCEED:
1141 		*err = EHOSTUNREACH;
1142 		break;
1143 	}
1144 
1145 	return fatal;
1146 }
1147 EXPORT_SYMBOL(icmpv6_err_convert);
1148 
1149 #ifdef CONFIG_SYSCTL
1150 static struct ctl_table ipv6_icmp_table_template[] = {
1151 	{
1152 		.procname	= "ratelimit",
1153 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1154 		.maxlen		= sizeof(int),
1155 		.mode		= 0644,
1156 		.proc_handler	= proc_dointvec_ms_jiffies,
1157 	},
1158 	{
1159 		.procname	= "echo_ignore_all",
1160 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1161 		.maxlen		= sizeof(u8),
1162 		.mode		= 0644,
1163 		.proc_handler = proc_dou8vec_minmax,
1164 	},
1165 	{
1166 		.procname	= "echo_ignore_multicast",
1167 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1168 		.maxlen		= sizeof(u8),
1169 		.mode		= 0644,
1170 		.proc_handler = proc_dou8vec_minmax,
1171 	},
1172 	{
1173 		.procname	= "echo_ignore_anycast",
1174 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1175 		.maxlen		= sizeof(u8),
1176 		.mode		= 0644,
1177 		.proc_handler = proc_dou8vec_minmax,
1178 	},
1179 	{
1180 		.procname	= "ratemask",
1181 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1182 		.maxlen		= ICMPV6_MSG_MAX + 1,
1183 		.mode		= 0644,
1184 		.proc_handler = proc_do_large_bitmap,
1185 	},
1186 	{ },
1187 };
1188 
1189 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1190 {
1191 	struct ctl_table *table;
1192 
1193 	table = kmemdup(ipv6_icmp_table_template,
1194 			sizeof(ipv6_icmp_table_template),
1195 			GFP_KERNEL);
1196 
1197 	if (table) {
1198 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1199 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1200 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1201 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1202 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1203 	}
1204 	return table;
1205 }
1206 #endif
1207