xref: /openbmc/linux/net/ipv6/icmp.c (revision 13dd8710)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
63 #include <net/icmp.h>
64 #include <net/xfrm.h>
65 #include <net/inet_common.h>
66 #include <net/dsfield.h>
67 #include <net/l3mdev.h>
68 
69 #include <linux/uaccess.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static struct sock *icmpv6_sk(struct net *net)
79 {
80 	return this_cpu_read(*net->ipv6.icmp_sk);
81 }
82 
83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84 		       u8 type, u8 code, int offset, __be32 info)
85 {
86 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88 	struct net *net = dev_net(skb->dev);
89 
90 	if (type == ICMPV6_PKT_TOOBIG)
91 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92 	else if (type == NDISC_REDIRECT)
93 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
94 			     sock_net_uid(net, NULL));
95 
96 	if (!(type & ICMPV6_INFOMSG_MASK))
97 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98 			ping_err(skb, offset, ntohl(info));
99 
100 	return 0;
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 /* Called with BH disabled */
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		return NULL;
123 	}
124 	return sk;
125 }
126 
127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128 {
129 	spin_unlock(&sk->sk_lock.slock);
130 }
131 
132 /*
133  * Figure out, may we reply to this packet with icmp error.
134  *
135  * We do not reply, if:
136  *	- it was icmp error message.
137  *	- it is truncated, so that it is known, that protocol is ICMPV6
138  *	  (i.e. in the middle of some exthdr)
139  *
140  *	--ANK (980726)
141  */
142 
143 static bool is_ineligible(const struct sk_buff *skb)
144 {
145 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146 	int len = skb->len - ptr;
147 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148 	__be16 frag_off;
149 
150 	if (len < 0)
151 		return true;
152 
153 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154 	if (ptr < 0)
155 		return false;
156 	if (nexthdr == IPPROTO_ICMPV6) {
157 		u8 _type, *tp;
158 		tp = skb_header_pointer(skb,
159 			ptr+offsetof(struct icmp6hdr, icmp6_type),
160 			sizeof(_type), &_type);
161 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
162 			return true;
163 	}
164 	return false;
165 }
166 
167 static bool icmpv6_mask_allow(struct net *net, int type)
168 {
169 	if (type > ICMPV6_MSG_MAX)
170 		return true;
171 
172 	/* Limit if icmp type is set in ratemask. */
173 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
174 		return true;
175 
176 	return false;
177 }
178 
179 static bool icmpv6_global_allow(struct net *net, int type)
180 {
181 	if (icmpv6_mask_allow(net, type))
182 		return true;
183 
184 	if (icmp_global_allow())
185 		return true;
186 
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	dst_release(dst);
229 	return res;
230 }
231 
232 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
233 				  struct flowi6 *fl6)
234 {
235 	struct net *net = sock_net(sk);
236 	struct dst_entry *dst;
237 	bool res = false;
238 
239 	dst = ip6_route_output(net, sk, fl6);
240 	if (!dst->error) {
241 		struct rt6_info *rt = (struct rt6_info *)dst;
242 		struct in6_addr prefsrc;
243 
244 		rt6_get_prefsrc(rt, &prefsrc);
245 		res = !ipv6_addr_any(&prefsrc);
246 	}
247 	dst_release(dst);
248 	return res;
249 }
250 
251 /*
252  *	an inline helper for the "simple" if statement below
253  *	checks if parameter problem report is caused by an
254  *	unrecognized IPv6 option that has the Option Type
255  *	highest-order two bits set to 10
256  */
257 
258 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
259 {
260 	u8 _optval, *op;
261 
262 	offset += skb_network_offset(skb);
263 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
264 	if (!op)
265 		return true;
266 	return (*op & 0xC0) == 0x80;
267 }
268 
269 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
270 				struct icmp6hdr *thdr, int len)
271 {
272 	struct sk_buff *skb;
273 	struct icmp6hdr *icmp6h;
274 
275 	skb = skb_peek(&sk->sk_write_queue);
276 	if (!skb)
277 		return;
278 
279 	icmp6h = icmp6_hdr(skb);
280 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
281 	icmp6h->icmp6_cksum = 0;
282 
283 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
284 		skb->csum = csum_partial(icmp6h,
285 					sizeof(struct icmp6hdr), skb->csum);
286 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
287 						      &fl6->daddr,
288 						      len, fl6->flowi6_proto,
289 						      skb->csum);
290 	} else {
291 		__wsum tmp_csum = 0;
292 
293 		skb_queue_walk(&sk->sk_write_queue, skb) {
294 			tmp_csum = csum_add(tmp_csum, skb->csum);
295 		}
296 
297 		tmp_csum = csum_partial(icmp6h,
298 					sizeof(struct icmp6hdr), tmp_csum);
299 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
300 						      &fl6->daddr,
301 						      len, fl6->flowi6_proto,
302 						      tmp_csum);
303 	}
304 	ip6_push_pending_frames(sk);
305 }
306 
307 struct icmpv6_msg {
308 	struct sk_buff	*skb;
309 	int		offset;
310 	uint8_t		type;
311 };
312 
313 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
314 {
315 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
316 	struct sk_buff *org_skb = msg->skb;
317 	__wsum csum;
318 
319 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
320 				      to, len);
321 	skb->csum = csum_block_add(skb->csum, csum, odd);
322 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
323 		nf_ct_attach(skb, org_skb);
324 	return 0;
325 }
326 
327 #if IS_ENABLED(CONFIG_IPV6_MIP6)
328 static void mip6_addr_swap(struct sk_buff *skb)
329 {
330 	struct ipv6hdr *iph = ipv6_hdr(skb);
331 	struct inet6_skb_parm *opt = IP6CB(skb);
332 	struct ipv6_destopt_hao *hao;
333 	struct in6_addr tmp;
334 	int off;
335 
336 	if (opt->dsthao) {
337 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
338 		if (likely(off >= 0)) {
339 			hao = (struct ipv6_destopt_hao *)
340 					(skb_network_header(skb) + off);
341 			tmp = iph->saddr;
342 			iph->saddr = hao->addr;
343 			hao->addr = tmp;
344 		}
345 	}
346 }
347 #else
348 static inline void mip6_addr_swap(struct sk_buff *skb) {}
349 #endif
350 
351 static struct dst_entry *icmpv6_route_lookup(struct net *net,
352 					     struct sk_buff *skb,
353 					     struct sock *sk,
354 					     struct flowi6 *fl6)
355 {
356 	struct dst_entry *dst, *dst2;
357 	struct flowi6 fl2;
358 	int err;
359 
360 	err = ip6_dst_lookup(net, sk, &dst, fl6);
361 	if (err)
362 		return ERR_PTR(err);
363 
364 	/*
365 	 * We won't send icmp if the destination is known
366 	 * anycast.
367 	 */
368 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
369 		net_dbg_ratelimited("icmp6_send: acast source\n");
370 		dst_release(dst);
371 		return ERR_PTR(-EINVAL);
372 	}
373 
374 	/* No need to clone since we're just using its address. */
375 	dst2 = dst;
376 
377 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
378 	if (!IS_ERR(dst)) {
379 		if (dst != dst2)
380 			return dst;
381 	} else {
382 		if (PTR_ERR(dst) == -EPERM)
383 			dst = NULL;
384 		else
385 			return dst;
386 	}
387 
388 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
389 	if (err)
390 		goto relookup_failed;
391 
392 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
393 	if (err)
394 		goto relookup_failed;
395 
396 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
397 	if (!IS_ERR(dst2)) {
398 		dst_release(dst);
399 		dst = dst2;
400 	} else {
401 		err = PTR_ERR(dst2);
402 		if (err == -EPERM) {
403 			dst_release(dst);
404 			return dst2;
405 		} else
406 			goto relookup_failed;
407 	}
408 
409 relookup_failed:
410 	if (dst)
411 		return dst;
412 	return ERR_PTR(err);
413 }
414 
415 static struct net_device *icmp6_dev(const struct sk_buff *skb)
416 {
417 	struct net_device *dev = skb->dev;
418 
419 	/* for local traffic to local address, skb dev is the loopback
420 	 * device. Check if there is a dst attached to the skb and if so
421 	 * get the real device index. Same is needed for replies to a link
422 	 * local address on a device enslaved to an L3 master device
423 	 */
424 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
425 		const struct rt6_info *rt6 = skb_rt6_info(skb);
426 
427 		if (rt6)
428 			dev = rt6->rt6i_idev->dev;
429 	}
430 
431 	return dev;
432 }
433 
434 static int icmp6_iif(const struct sk_buff *skb)
435 {
436 	return icmp6_dev(skb)->ifindex;
437 }
438 
439 /*
440  *	Send an ICMP message in response to a packet in error
441  */
442 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
443 		const struct in6_addr *force_saddr)
444 {
445 	struct inet6_dev *idev = NULL;
446 	struct ipv6hdr *hdr = ipv6_hdr(skb);
447 	struct sock *sk;
448 	struct net *net;
449 	struct ipv6_pinfo *np;
450 	const struct in6_addr *saddr = NULL;
451 	struct dst_entry *dst;
452 	struct icmp6hdr tmp_hdr;
453 	struct flowi6 fl6;
454 	struct icmpv6_msg msg;
455 	struct ipcm6_cookie ipc6;
456 	int iif = 0;
457 	int addr_type = 0;
458 	int len;
459 	u32 mark;
460 
461 	if ((u8 *)hdr < skb->head ||
462 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
463 		return;
464 
465 	if (!skb->dev)
466 		return;
467 	net = dev_net(skb->dev);
468 	mark = IP6_REPLY_MARK(net, skb->mark);
469 	/*
470 	 *	Make sure we respect the rules
471 	 *	i.e. RFC 1885 2.4(e)
472 	 *	Rule (e.1) is enforced by not using icmp6_send
473 	 *	in any code that processes icmp errors.
474 	 */
475 	addr_type = ipv6_addr_type(&hdr->daddr);
476 
477 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
478 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
479 		saddr = &hdr->daddr;
480 
481 	/*
482 	 *	Dest addr check
483 	 */
484 
485 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
486 		if (type != ICMPV6_PKT_TOOBIG &&
487 		    !(type == ICMPV6_PARAMPROB &&
488 		      code == ICMPV6_UNK_OPTION &&
489 		      (opt_unrec(skb, info))))
490 			return;
491 
492 		saddr = NULL;
493 	}
494 
495 	addr_type = ipv6_addr_type(&hdr->saddr);
496 
497 	/*
498 	 *	Source addr check
499 	 */
500 
501 	if (__ipv6_addr_needs_scope_id(addr_type)) {
502 		iif = icmp6_iif(skb);
503 	} else {
504 		/*
505 		 * The source device is used for looking up which routing table
506 		 * to use for sending an ICMP error.
507 		 */
508 		iif = l3mdev_master_ifindex(skb->dev);
509 	}
510 
511 	/*
512 	 *	Must not send error if the source does not uniquely
513 	 *	identify a single node (RFC2463 Section 2.4).
514 	 *	We check unspecified / multicast addresses here,
515 	 *	and anycast addresses will be checked later.
516 	 */
517 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
518 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
519 				    &hdr->saddr, &hdr->daddr);
520 		return;
521 	}
522 
523 	/*
524 	 *	Never answer to a ICMP packet.
525 	 */
526 	if (is_ineligible(skb)) {
527 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
528 				    &hdr->saddr, &hdr->daddr);
529 		return;
530 	}
531 
532 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
533 	local_bh_disable();
534 
535 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
536 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
537 		goto out_bh_enable;
538 
539 	mip6_addr_swap(skb);
540 
541 	sk = icmpv6_xmit_lock(net);
542 	if (!sk)
543 		goto out_bh_enable;
544 
545 	memset(&fl6, 0, sizeof(fl6));
546 	fl6.flowi6_proto = IPPROTO_ICMPV6;
547 	fl6.daddr = hdr->saddr;
548 	if (force_saddr)
549 		saddr = force_saddr;
550 	if (saddr) {
551 		fl6.saddr = *saddr;
552 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
553 		/* select a more meaningful saddr from input if */
554 		struct net_device *in_netdev;
555 
556 		in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
557 		if (in_netdev) {
558 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
559 					   inet6_sk(sk)->srcprefs,
560 					   &fl6.saddr);
561 			dev_put(in_netdev);
562 		}
563 	}
564 	fl6.flowi6_mark = mark;
565 	fl6.flowi6_oif = iif;
566 	fl6.fl6_icmp_type = type;
567 	fl6.fl6_icmp_code = code;
568 	fl6.flowi6_uid = sock_net_uid(net, NULL);
569 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
570 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
571 
572 	np = inet6_sk(sk);
573 
574 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
575 		goto out;
576 
577 	tmp_hdr.icmp6_type = type;
578 	tmp_hdr.icmp6_code = code;
579 	tmp_hdr.icmp6_cksum = 0;
580 	tmp_hdr.icmp6_pointer = htonl(info);
581 
582 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
583 		fl6.flowi6_oif = np->mcast_oif;
584 	else if (!fl6.flowi6_oif)
585 		fl6.flowi6_oif = np->ucast_oif;
586 
587 	ipcm6_init_sk(&ipc6, np);
588 	ipc6.sockc.mark = mark;
589 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
590 
591 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
592 	if (IS_ERR(dst))
593 		goto out;
594 
595 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
596 
597 	msg.skb = skb;
598 	msg.offset = skb_network_offset(skb);
599 	msg.type = type;
600 
601 	len = skb->len - msg.offset;
602 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
603 	if (len < 0) {
604 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
605 				    &hdr->saddr, &hdr->daddr);
606 		goto out_dst_release;
607 	}
608 
609 	rcu_read_lock();
610 	idev = __in6_dev_get(skb->dev);
611 
612 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
613 			    len + sizeof(struct icmp6hdr),
614 			    sizeof(struct icmp6hdr),
615 			    &ipc6, &fl6, (struct rt6_info *)dst,
616 			    MSG_DONTWAIT)) {
617 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
618 		ip6_flush_pending_frames(sk);
619 	} else {
620 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
621 					   len + sizeof(struct icmp6hdr));
622 	}
623 	rcu_read_unlock();
624 out_dst_release:
625 	dst_release(dst);
626 out:
627 	icmpv6_xmit_unlock(sk);
628 out_bh_enable:
629 	local_bh_enable();
630 }
631 EXPORT_SYMBOL(icmp6_send);
632 
633 /* Slightly more convenient version of icmp6_send.
634  */
635 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
636 {
637 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
638 	kfree_skb(skb);
639 }
640 
641 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
642  * if sufficient data bytes are available
643  * @nhs is the size of the tunnel header(s) :
644  *  Either an IPv4 header for SIT encap
645  *         an IPv4 header + GRE header for GRE encap
646  */
647 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
648 			       unsigned int data_len)
649 {
650 	struct in6_addr temp_saddr;
651 	struct rt6_info *rt;
652 	struct sk_buff *skb2;
653 	u32 info = 0;
654 
655 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
656 		return 1;
657 
658 	/* RFC 4884 (partial) support for ICMP extensions */
659 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
660 		data_len = 0;
661 
662 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
663 
664 	if (!skb2)
665 		return 1;
666 
667 	skb_dst_drop(skb2);
668 	skb_pull(skb2, nhs);
669 	skb_reset_network_header(skb2);
670 
671 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
672 			skb, 0);
673 
674 	if (rt && rt->dst.dev)
675 		skb2->dev = rt->dst.dev;
676 
677 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
678 
679 	if (data_len) {
680 		/* RFC 4884 (partial) support :
681 		 * insert 0 padding at the end, before the extensions
682 		 */
683 		__skb_push(skb2, nhs);
684 		skb_reset_network_header(skb2);
685 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
686 		memset(skb2->data + data_len - nhs, 0, nhs);
687 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
688 		 * and stored in reserved[0]
689 		 */
690 		info = (data_len/8) << 24;
691 	}
692 	if (type == ICMP_TIME_EXCEEDED)
693 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
694 			   info, &temp_saddr);
695 	else
696 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
697 			   info, &temp_saddr);
698 	if (rt)
699 		ip6_rt_put(rt);
700 
701 	kfree_skb(skb2);
702 
703 	return 0;
704 }
705 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
706 
707 static void icmpv6_echo_reply(struct sk_buff *skb)
708 {
709 	struct net *net = dev_net(skb->dev);
710 	struct sock *sk;
711 	struct inet6_dev *idev;
712 	struct ipv6_pinfo *np;
713 	const struct in6_addr *saddr = NULL;
714 	struct icmp6hdr *icmph = icmp6_hdr(skb);
715 	struct icmp6hdr tmp_hdr;
716 	struct flowi6 fl6;
717 	struct icmpv6_msg msg;
718 	struct dst_entry *dst;
719 	struct ipcm6_cookie ipc6;
720 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
721 	bool acast;
722 
723 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
724 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
725 		return;
726 
727 	saddr = &ipv6_hdr(skb)->daddr;
728 
729 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
730 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
731 		return;
732 
733 	if (!ipv6_unicast_destination(skb) &&
734 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
735 		saddr = NULL;
736 
737 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
738 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
739 
740 	memset(&fl6, 0, sizeof(fl6));
741 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
742 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
743 
744 	fl6.flowi6_proto = IPPROTO_ICMPV6;
745 	fl6.daddr = ipv6_hdr(skb)->saddr;
746 	if (saddr)
747 		fl6.saddr = *saddr;
748 	fl6.flowi6_oif = icmp6_iif(skb);
749 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
750 	fl6.flowi6_mark = mark;
751 	fl6.flowi6_uid = sock_net_uid(net, NULL);
752 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
753 
754 	local_bh_disable();
755 	sk = icmpv6_xmit_lock(net);
756 	if (!sk)
757 		goto out_bh_enable;
758 	np = inet6_sk(sk);
759 
760 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
761 		fl6.flowi6_oif = np->mcast_oif;
762 	else if (!fl6.flowi6_oif)
763 		fl6.flowi6_oif = np->ucast_oif;
764 
765 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
766 		goto out;
767 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
768 	if (IS_ERR(dst))
769 		goto out;
770 
771 	/* Check the ratelimit */
772 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
773 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
774 		goto out_dst_release;
775 
776 	idev = __in6_dev_get(skb->dev);
777 
778 	msg.skb = skb;
779 	msg.offset = 0;
780 	msg.type = ICMPV6_ECHO_REPLY;
781 
782 	ipcm6_init_sk(&ipc6, np);
783 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
784 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
785 	ipc6.sockc.mark = mark;
786 
787 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
788 			    skb->len + sizeof(struct icmp6hdr),
789 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
790 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
791 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
792 		ip6_flush_pending_frames(sk);
793 	} else {
794 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
795 					   skb->len + sizeof(struct icmp6hdr));
796 	}
797 out_dst_release:
798 	dst_release(dst);
799 out:
800 	icmpv6_xmit_unlock(sk);
801 out_bh_enable:
802 	local_bh_enable();
803 }
804 
805 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
806 {
807 	const struct inet6_protocol *ipprot;
808 	int inner_offset;
809 	__be16 frag_off;
810 	u8 nexthdr;
811 	struct net *net = dev_net(skb->dev);
812 
813 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
814 		goto out;
815 
816 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
817 	if (ipv6_ext_hdr(nexthdr)) {
818 		/* now skip over extension headers */
819 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
820 						&nexthdr, &frag_off);
821 		if (inner_offset < 0)
822 			goto out;
823 	} else {
824 		inner_offset = sizeof(struct ipv6hdr);
825 	}
826 
827 	/* Checkin header including 8 bytes of inner protocol header. */
828 	if (!pskb_may_pull(skb, inner_offset+8))
829 		goto out;
830 
831 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
832 	   Without this we will not able f.e. to make source routed
833 	   pmtu discovery.
834 	   Corresponding argument (opt) to notifiers is already added.
835 	   --ANK (980726)
836 	 */
837 
838 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
839 	if (ipprot && ipprot->err_handler)
840 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
841 
842 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
843 	return;
844 
845 out:
846 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
847 }
848 
849 /*
850  *	Handle icmp messages
851  */
852 
853 static int icmpv6_rcv(struct sk_buff *skb)
854 {
855 	struct net *net = dev_net(skb->dev);
856 	struct net_device *dev = icmp6_dev(skb);
857 	struct inet6_dev *idev = __in6_dev_get(dev);
858 	const struct in6_addr *saddr, *daddr;
859 	struct icmp6hdr *hdr;
860 	u8 type;
861 	bool success = false;
862 
863 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
864 		struct sec_path *sp = skb_sec_path(skb);
865 		int nh;
866 
867 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
868 				 XFRM_STATE_ICMP))
869 			goto drop_no_count;
870 
871 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
872 			goto drop_no_count;
873 
874 		nh = skb_network_offset(skb);
875 		skb_set_network_header(skb, sizeof(*hdr));
876 
877 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
878 			goto drop_no_count;
879 
880 		skb_set_network_header(skb, nh);
881 	}
882 
883 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
884 
885 	saddr = &ipv6_hdr(skb)->saddr;
886 	daddr = &ipv6_hdr(skb)->daddr;
887 
888 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
889 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
890 				    saddr, daddr);
891 		goto csum_error;
892 	}
893 
894 	if (!pskb_pull(skb, sizeof(*hdr)))
895 		goto discard_it;
896 
897 	hdr = icmp6_hdr(skb);
898 
899 	type = hdr->icmp6_type;
900 
901 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
902 
903 	switch (type) {
904 	case ICMPV6_ECHO_REQUEST:
905 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
906 			icmpv6_echo_reply(skb);
907 		break;
908 
909 	case ICMPV6_ECHO_REPLY:
910 		success = ping_rcv(skb);
911 		break;
912 
913 	case ICMPV6_PKT_TOOBIG:
914 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
915 		   standard destination cache. Seems, only "advanced"
916 		   destination cache will allow to solve this problem
917 		   --ANK (980726)
918 		 */
919 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
920 			goto discard_it;
921 		hdr = icmp6_hdr(skb);
922 
923 		/* to notify */
924 		fallthrough;
925 	case ICMPV6_DEST_UNREACH:
926 	case ICMPV6_TIME_EXCEED:
927 	case ICMPV6_PARAMPROB:
928 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
929 		break;
930 
931 	case NDISC_ROUTER_SOLICITATION:
932 	case NDISC_ROUTER_ADVERTISEMENT:
933 	case NDISC_NEIGHBOUR_SOLICITATION:
934 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
935 	case NDISC_REDIRECT:
936 		ndisc_rcv(skb);
937 		break;
938 
939 	case ICMPV6_MGM_QUERY:
940 		igmp6_event_query(skb);
941 		break;
942 
943 	case ICMPV6_MGM_REPORT:
944 		igmp6_event_report(skb);
945 		break;
946 
947 	case ICMPV6_MGM_REDUCTION:
948 	case ICMPV6_NI_QUERY:
949 	case ICMPV6_NI_REPLY:
950 	case ICMPV6_MLD2_REPORT:
951 	case ICMPV6_DHAAD_REQUEST:
952 	case ICMPV6_DHAAD_REPLY:
953 	case ICMPV6_MOBILE_PREFIX_SOL:
954 	case ICMPV6_MOBILE_PREFIX_ADV:
955 		break;
956 
957 	default:
958 		/* informational */
959 		if (type & ICMPV6_INFOMSG_MASK)
960 			break;
961 
962 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
963 				    saddr, daddr);
964 
965 		/*
966 		 * error of unknown type.
967 		 * must pass to upper level
968 		 */
969 
970 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
971 	}
972 
973 	/* until the v6 path can be better sorted assume failure and
974 	 * preserve the status quo behaviour for the rest of the paths to here
975 	 */
976 	if (success)
977 		consume_skb(skb);
978 	else
979 		kfree_skb(skb);
980 
981 	return 0;
982 
983 csum_error:
984 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
985 discard_it:
986 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
987 drop_no_count:
988 	kfree_skb(skb);
989 	return 0;
990 }
991 
992 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
993 		      u8 type,
994 		      const struct in6_addr *saddr,
995 		      const struct in6_addr *daddr,
996 		      int oif)
997 {
998 	memset(fl6, 0, sizeof(*fl6));
999 	fl6->saddr = *saddr;
1000 	fl6->daddr = *daddr;
1001 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1002 	fl6->fl6_icmp_type	= type;
1003 	fl6->fl6_icmp_code	= 0;
1004 	fl6->flowi6_oif		= oif;
1005 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
1006 }
1007 
1008 static void __net_exit icmpv6_sk_exit(struct net *net)
1009 {
1010 	int i;
1011 
1012 	for_each_possible_cpu(i)
1013 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1014 	free_percpu(net->ipv6.icmp_sk);
1015 }
1016 
1017 static int __net_init icmpv6_sk_init(struct net *net)
1018 {
1019 	struct sock *sk;
1020 	int err, i;
1021 
1022 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1023 	if (!net->ipv6.icmp_sk)
1024 		return -ENOMEM;
1025 
1026 	for_each_possible_cpu(i) {
1027 		err = inet_ctl_sock_create(&sk, PF_INET6,
1028 					   SOCK_RAW, IPPROTO_ICMPV6, net);
1029 		if (err < 0) {
1030 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1031 			       err);
1032 			goto fail;
1033 		}
1034 
1035 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1036 
1037 		/* Enough space for 2 64K ICMP packets, including
1038 		 * sk_buff struct overhead.
1039 		 */
1040 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1041 	}
1042 	return 0;
1043 
1044  fail:
1045 	icmpv6_sk_exit(net);
1046 	return err;
1047 }
1048 
1049 static struct pernet_operations icmpv6_sk_ops = {
1050 	.init = icmpv6_sk_init,
1051 	.exit = icmpv6_sk_exit,
1052 };
1053 
1054 int __init icmpv6_init(void)
1055 {
1056 	int err;
1057 
1058 	err = register_pernet_subsys(&icmpv6_sk_ops);
1059 	if (err < 0)
1060 		return err;
1061 
1062 	err = -EAGAIN;
1063 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1064 		goto fail;
1065 
1066 	err = inet6_register_icmp_sender(icmp6_send);
1067 	if (err)
1068 		goto sender_reg_err;
1069 	return 0;
1070 
1071 sender_reg_err:
1072 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1073 fail:
1074 	pr_err("Failed to register ICMP6 protocol\n");
1075 	unregister_pernet_subsys(&icmpv6_sk_ops);
1076 	return err;
1077 }
1078 
1079 void icmpv6_cleanup(void)
1080 {
1081 	inet6_unregister_icmp_sender(icmp6_send);
1082 	unregister_pernet_subsys(&icmpv6_sk_ops);
1083 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1084 }
1085 
1086 
1087 static const struct icmp6_err {
1088 	int err;
1089 	int fatal;
1090 } tab_unreach[] = {
1091 	{	/* NOROUTE */
1092 		.err	= ENETUNREACH,
1093 		.fatal	= 0,
1094 	},
1095 	{	/* ADM_PROHIBITED */
1096 		.err	= EACCES,
1097 		.fatal	= 1,
1098 	},
1099 	{	/* Was NOT_NEIGHBOUR, now reserved */
1100 		.err	= EHOSTUNREACH,
1101 		.fatal	= 0,
1102 	},
1103 	{	/* ADDR_UNREACH	*/
1104 		.err	= EHOSTUNREACH,
1105 		.fatal	= 0,
1106 	},
1107 	{	/* PORT_UNREACH	*/
1108 		.err	= ECONNREFUSED,
1109 		.fatal	= 1,
1110 	},
1111 	{	/* POLICY_FAIL */
1112 		.err	= EACCES,
1113 		.fatal	= 1,
1114 	},
1115 	{	/* REJECT_ROUTE	*/
1116 		.err	= EACCES,
1117 		.fatal	= 1,
1118 	},
1119 };
1120 
1121 int icmpv6_err_convert(u8 type, u8 code, int *err)
1122 {
1123 	int fatal = 0;
1124 
1125 	*err = EPROTO;
1126 
1127 	switch (type) {
1128 	case ICMPV6_DEST_UNREACH:
1129 		fatal = 1;
1130 		if (code < ARRAY_SIZE(tab_unreach)) {
1131 			*err  = tab_unreach[code].err;
1132 			fatal = tab_unreach[code].fatal;
1133 		}
1134 		break;
1135 
1136 	case ICMPV6_PKT_TOOBIG:
1137 		*err = EMSGSIZE;
1138 		break;
1139 
1140 	case ICMPV6_PARAMPROB:
1141 		*err = EPROTO;
1142 		fatal = 1;
1143 		break;
1144 
1145 	case ICMPV6_TIME_EXCEED:
1146 		*err = EHOSTUNREACH;
1147 		break;
1148 	}
1149 
1150 	return fatal;
1151 }
1152 EXPORT_SYMBOL(icmpv6_err_convert);
1153 
1154 #ifdef CONFIG_SYSCTL
1155 static struct ctl_table ipv6_icmp_table_template[] = {
1156 	{
1157 		.procname	= "ratelimit",
1158 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1159 		.maxlen		= sizeof(int),
1160 		.mode		= 0644,
1161 		.proc_handler	= proc_dointvec_ms_jiffies,
1162 	},
1163 	{
1164 		.procname	= "echo_ignore_all",
1165 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1166 		.maxlen		= sizeof(int),
1167 		.mode		= 0644,
1168 		.proc_handler = proc_dointvec,
1169 	},
1170 	{
1171 		.procname	= "echo_ignore_multicast",
1172 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1173 		.maxlen		= sizeof(int),
1174 		.mode		= 0644,
1175 		.proc_handler = proc_dointvec,
1176 	},
1177 	{
1178 		.procname	= "echo_ignore_anycast",
1179 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1180 		.maxlen		= sizeof(int),
1181 		.mode		= 0644,
1182 		.proc_handler = proc_dointvec,
1183 	},
1184 	{
1185 		.procname	= "ratemask",
1186 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1187 		.maxlen		= ICMPV6_MSG_MAX + 1,
1188 		.mode		= 0644,
1189 		.proc_handler = proc_do_large_bitmap,
1190 	},
1191 	{ },
1192 };
1193 
1194 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1195 {
1196 	struct ctl_table *table;
1197 
1198 	table = kmemdup(ipv6_icmp_table_template,
1199 			sizeof(ipv6_icmp_table_template),
1200 			GFP_KERNEL);
1201 
1202 	if (table) {
1203 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1204 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1205 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1206 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1207 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1208 	}
1209 	return table;
1210 }
1211 #endif
1212