xref: /openbmc/linux/net/ipv6/icmp.c (revision 1d27a0be)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
63 #include <net/icmp.h>
64 #include <net/xfrm.h>
65 #include <net/inet_common.h>
66 #include <net/dsfield.h>
67 #include <net/l3mdev.h>
68 
69 #include <linux/uaccess.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static struct sock *icmpv6_sk(struct net *net)
79 {
80 	return this_cpu_read(*net->ipv6.icmp_sk);
81 }
82 
83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84 		       u8 type, u8 code, int offset, __be32 info)
85 {
86 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88 	struct net *net = dev_net(skb->dev);
89 
90 	if (type == ICMPV6_PKT_TOOBIG)
91 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92 	else if (type == NDISC_REDIRECT)
93 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
94 			     sock_net_uid(net, NULL));
95 
96 	if (!(type & ICMPV6_INFOMSG_MASK))
97 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98 			ping_err(skb, offset, ntohl(info));
99 
100 	return 0;
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 /* Called with BH disabled */
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		return NULL;
123 	}
124 	return sk;
125 }
126 
127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128 {
129 	spin_unlock(&sk->sk_lock.slock);
130 }
131 
132 /*
133  * Figure out, may we reply to this packet with icmp error.
134  *
135  * We do not reply, if:
136  *	- it was icmp error message.
137  *	- it is truncated, so that it is known, that protocol is ICMPV6
138  *	  (i.e. in the middle of some exthdr)
139  *
140  *	--ANK (980726)
141  */
142 
143 static bool is_ineligible(const struct sk_buff *skb)
144 {
145 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146 	int len = skb->len - ptr;
147 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148 	__be16 frag_off;
149 
150 	if (len < 0)
151 		return true;
152 
153 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154 	if (ptr < 0)
155 		return false;
156 	if (nexthdr == IPPROTO_ICMPV6) {
157 		u8 _type, *tp;
158 		tp = skb_header_pointer(skb,
159 			ptr+offsetof(struct icmp6hdr, icmp6_type),
160 			sizeof(_type), &_type);
161 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
162 			return true;
163 	}
164 	return false;
165 }
166 
167 static bool icmpv6_mask_allow(struct net *net, int type)
168 {
169 	if (type > ICMPV6_MSG_MAX)
170 		return true;
171 
172 	/* Limit if icmp type is set in ratemask. */
173 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
174 		return true;
175 
176 	return false;
177 }
178 
179 static bool icmpv6_global_allow(struct net *net, int type)
180 {
181 	if (icmpv6_mask_allow(net, type))
182 		return true;
183 
184 	if (icmp_global_allow())
185 		return true;
186 
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	dst_release(dst);
229 	return res;
230 }
231 
232 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
233 				  struct flowi6 *fl6)
234 {
235 	struct net *net = sock_net(sk);
236 	struct dst_entry *dst;
237 	bool res = false;
238 
239 	dst = ip6_route_output(net, sk, fl6);
240 	if (!dst->error) {
241 		struct rt6_info *rt = (struct rt6_info *)dst;
242 		struct in6_addr prefsrc;
243 
244 		rt6_get_prefsrc(rt, &prefsrc);
245 		res = !ipv6_addr_any(&prefsrc);
246 	}
247 	dst_release(dst);
248 	return res;
249 }
250 
251 /*
252  *	an inline helper for the "simple" if statement below
253  *	checks if parameter problem report is caused by an
254  *	unrecognized IPv6 option that has the Option Type
255  *	highest-order two bits set to 10
256  */
257 
258 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
259 {
260 	u8 _optval, *op;
261 
262 	offset += skb_network_offset(skb);
263 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
264 	if (!op)
265 		return true;
266 	return (*op & 0xC0) == 0x80;
267 }
268 
269 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
270 				struct icmp6hdr *thdr, int len)
271 {
272 	struct sk_buff *skb;
273 	struct icmp6hdr *icmp6h;
274 
275 	skb = skb_peek(&sk->sk_write_queue);
276 	if (!skb)
277 		return;
278 
279 	icmp6h = icmp6_hdr(skb);
280 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
281 	icmp6h->icmp6_cksum = 0;
282 
283 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
284 		skb->csum = csum_partial(icmp6h,
285 					sizeof(struct icmp6hdr), skb->csum);
286 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
287 						      &fl6->daddr,
288 						      len, fl6->flowi6_proto,
289 						      skb->csum);
290 	} else {
291 		__wsum tmp_csum = 0;
292 
293 		skb_queue_walk(&sk->sk_write_queue, skb) {
294 			tmp_csum = csum_add(tmp_csum, skb->csum);
295 		}
296 
297 		tmp_csum = csum_partial(icmp6h,
298 					sizeof(struct icmp6hdr), tmp_csum);
299 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
300 						      &fl6->daddr,
301 						      len, fl6->flowi6_proto,
302 						      tmp_csum);
303 	}
304 	ip6_push_pending_frames(sk);
305 }
306 
307 struct icmpv6_msg {
308 	struct sk_buff	*skb;
309 	int		offset;
310 	uint8_t		type;
311 };
312 
313 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
314 {
315 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
316 	struct sk_buff *org_skb = msg->skb;
317 	__wsum csum = 0;
318 
319 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
320 				      to, len, csum);
321 	skb->csum = csum_block_add(skb->csum, csum, odd);
322 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
323 		nf_ct_attach(skb, org_skb);
324 	return 0;
325 }
326 
327 #if IS_ENABLED(CONFIG_IPV6_MIP6)
328 static void mip6_addr_swap(struct sk_buff *skb)
329 {
330 	struct ipv6hdr *iph = ipv6_hdr(skb);
331 	struct inet6_skb_parm *opt = IP6CB(skb);
332 	struct ipv6_destopt_hao *hao;
333 	struct in6_addr tmp;
334 	int off;
335 
336 	if (opt->dsthao) {
337 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
338 		if (likely(off >= 0)) {
339 			hao = (struct ipv6_destopt_hao *)
340 					(skb_network_header(skb) + off);
341 			tmp = iph->saddr;
342 			iph->saddr = hao->addr;
343 			hao->addr = tmp;
344 		}
345 	}
346 }
347 #else
348 static inline void mip6_addr_swap(struct sk_buff *skb) {}
349 #endif
350 
351 static struct dst_entry *icmpv6_route_lookup(struct net *net,
352 					     struct sk_buff *skb,
353 					     struct sock *sk,
354 					     struct flowi6 *fl6)
355 {
356 	struct dst_entry *dst, *dst2;
357 	struct flowi6 fl2;
358 	int err;
359 
360 	err = ip6_dst_lookup(net, sk, &dst, fl6);
361 	if (err)
362 		return ERR_PTR(err);
363 
364 	/*
365 	 * We won't send icmp if the destination is known
366 	 * anycast.
367 	 */
368 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
369 		net_dbg_ratelimited("icmp6_send: acast source\n");
370 		dst_release(dst);
371 		return ERR_PTR(-EINVAL);
372 	}
373 
374 	/* No need to clone since we're just using its address. */
375 	dst2 = dst;
376 
377 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
378 	if (!IS_ERR(dst)) {
379 		if (dst != dst2)
380 			return dst;
381 	} else {
382 		if (PTR_ERR(dst) == -EPERM)
383 			dst = NULL;
384 		else
385 			return dst;
386 	}
387 
388 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
389 	if (err)
390 		goto relookup_failed;
391 
392 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
393 	if (err)
394 		goto relookup_failed;
395 
396 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
397 	if (!IS_ERR(dst2)) {
398 		dst_release(dst);
399 		dst = dst2;
400 	} else {
401 		err = PTR_ERR(dst2);
402 		if (err == -EPERM) {
403 			dst_release(dst);
404 			return dst2;
405 		} else
406 			goto relookup_failed;
407 	}
408 
409 relookup_failed:
410 	if (dst)
411 		return dst;
412 	return ERR_PTR(err);
413 }
414 
415 static struct net_device *icmp6_dev(const struct sk_buff *skb)
416 {
417 	struct net_device *dev = skb->dev;
418 
419 	/* for local traffic to local address, skb dev is the loopback
420 	 * device. Check if there is a dst attached to the skb and if so
421 	 * get the real device index. Same is needed for replies to a link
422 	 * local address on a device enslaved to an L3 master device
423 	 */
424 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
425 		const struct rt6_info *rt6 = skb_rt6_info(skb);
426 
427 		if (rt6)
428 			dev = rt6->rt6i_idev->dev;
429 	}
430 
431 	return dev;
432 }
433 
434 static int icmp6_iif(const struct sk_buff *skb)
435 {
436 	return icmp6_dev(skb)->ifindex;
437 }
438 
439 /*
440  *	Send an ICMP message in response to a packet in error
441  */
442 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
443 		       const struct in6_addr *force_saddr)
444 {
445 	struct inet6_dev *idev = NULL;
446 	struct ipv6hdr *hdr = ipv6_hdr(skb);
447 	struct sock *sk;
448 	struct net *net;
449 	struct ipv6_pinfo *np;
450 	const struct in6_addr *saddr = NULL;
451 	struct dst_entry *dst;
452 	struct icmp6hdr tmp_hdr;
453 	struct flowi6 fl6;
454 	struct icmpv6_msg msg;
455 	struct ipcm6_cookie ipc6;
456 	int iif = 0;
457 	int addr_type = 0;
458 	int len;
459 	u32 mark;
460 
461 	if ((u8 *)hdr < skb->head ||
462 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
463 		return;
464 
465 	if (!skb->dev)
466 		return;
467 	net = dev_net(skb->dev);
468 	mark = IP6_REPLY_MARK(net, skb->mark);
469 	/*
470 	 *	Make sure we respect the rules
471 	 *	i.e. RFC 1885 2.4(e)
472 	 *	Rule (e.1) is enforced by not using icmp6_send
473 	 *	in any code that processes icmp errors.
474 	 */
475 	addr_type = ipv6_addr_type(&hdr->daddr);
476 
477 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
478 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
479 		saddr = &hdr->daddr;
480 
481 	/*
482 	 *	Dest addr check
483 	 */
484 
485 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
486 		if (type != ICMPV6_PKT_TOOBIG &&
487 		    !(type == ICMPV6_PARAMPROB &&
488 		      code == ICMPV6_UNK_OPTION &&
489 		      (opt_unrec(skb, info))))
490 			return;
491 
492 		saddr = NULL;
493 	}
494 
495 	addr_type = ipv6_addr_type(&hdr->saddr);
496 
497 	/*
498 	 *	Source addr check
499 	 */
500 
501 	if (__ipv6_addr_needs_scope_id(addr_type)) {
502 		iif = icmp6_iif(skb);
503 	} else {
504 		dst = skb_dst(skb);
505 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
506 	}
507 
508 	/*
509 	 *	Must not send error if the source does not uniquely
510 	 *	identify a single node (RFC2463 Section 2.4).
511 	 *	We check unspecified / multicast addresses here,
512 	 *	and anycast addresses will be checked later.
513 	 */
514 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
515 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
516 				    &hdr->saddr, &hdr->daddr);
517 		return;
518 	}
519 
520 	/*
521 	 *	Never answer to a ICMP packet.
522 	 */
523 	if (is_ineligible(skb)) {
524 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
525 				    &hdr->saddr, &hdr->daddr);
526 		return;
527 	}
528 
529 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
530 	local_bh_disable();
531 
532 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
533 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
534 		goto out_bh_enable;
535 
536 	mip6_addr_swap(skb);
537 
538 	sk = icmpv6_xmit_lock(net);
539 	if (!sk)
540 		goto out_bh_enable;
541 
542 	memset(&fl6, 0, sizeof(fl6));
543 	fl6.flowi6_proto = IPPROTO_ICMPV6;
544 	fl6.daddr = hdr->saddr;
545 	if (force_saddr)
546 		saddr = force_saddr;
547 	if (saddr) {
548 		fl6.saddr = *saddr;
549 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
550 		/* select a more meaningful saddr from input if */
551 		struct net_device *in_netdev;
552 
553 		in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
554 		if (in_netdev) {
555 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
556 					   inet6_sk(sk)->srcprefs,
557 					   &fl6.saddr);
558 			dev_put(in_netdev);
559 		}
560 	}
561 	fl6.flowi6_mark = mark;
562 	fl6.flowi6_oif = iif;
563 	fl6.fl6_icmp_type = type;
564 	fl6.fl6_icmp_code = code;
565 	fl6.flowi6_uid = sock_net_uid(net, NULL);
566 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
567 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
568 
569 	np = inet6_sk(sk);
570 
571 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
572 		goto out;
573 
574 	tmp_hdr.icmp6_type = type;
575 	tmp_hdr.icmp6_code = code;
576 	tmp_hdr.icmp6_cksum = 0;
577 	tmp_hdr.icmp6_pointer = htonl(info);
578 
579 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
580 		fl6.flowi6_oif = np->mcast_oif;
581 	else if (!fl6.flowi6_oif)
582 		fl6.flowi6_oif = np->ucast_oif;
583 
584 	ipcm6_init_sk(&ipc6, np);
585 	ipc6.sockc.mark = mark;
586 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
587 
588 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
589 	if (IS_ERR(dst))
590 		goto out;
591 
592 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
593 
594 	msg.skb = skb;
595 	msg.offset = skb_network_offset(skb);
596 	msg.type = type;
597 
598 	len = skb->len - msg.offset;
599 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
600 	if (len < 0) {
601 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
602 				    &hdr->saddr, &hdr->daddr);
603 		goto out_dst_release;
604 	}
605 
606 	rcu_read_lock();
607 	idev = __in6_dev_get(skb->dev);
608 
609 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
610 			    len + sizeof(struct icmp6hdr),
611 			    sizeof(struct icmp6hdr),
612 			    &ipc6, &fl6, (struct rt6_info *)dst,
613 			    MSG_DONTWAIT)) {
614 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
615 		ip6_flush_pending_frames(sk);
616 	} else {
617 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
618 					   len + sizeof(struct icmp6hdr));
619 	}
620 	rcu_read_unlock();
621 out_dst_release:
622 	dst_release(dst);
623 out:
624 	icmpv6_xmit_unlock(sk);
625 out_bh_enable:
626 	local_bh_enable();
627 }
628 
629 /* Slightly more convenient version of icmp6_send.
630  */
631 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
632 {
633 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
634 	kfree_skb(skb);
635 }
636 
637 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
638  * if sufficient data bytes are available
639  * @nhs is the size of the tunnel header(s) :
640  *  Either an IPv4 header for SIT encap
641  *         an IPv4 header + GRE header for GRE encap
642  */
643 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
644 			       unsigned int data_len)
645 {
646 	struct in6_addr temp_saddr;
647 	struct rt6_info *rt;
648 	struct sk_buff *skb2;
649 	u32 info = 0;
650 
651 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
652 		return 1;
653 
654 	/* RFC 4884 (partial) support for ICMP extensions */
655 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
656 		data_len = 0;
657 
658 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
659 
660 	if (!skb2)
661 		return 1;
662 
663 	skb_dst_drop(skb2);
664 	skb_pull(skb2, nhs);
665 	skb_reset_network_header(skb2);
666 
667 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
668 			skb, 0);
669 
670 	if (rt && rt->dst.dev)
671 		skb2->dev = rt->dst.dev;
672 
673 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
674 
675 	if (data_len) {
676 		/* RFC 4884 (partial) support :
677 		 * insert 0 padding at the end, before the extensions
678 		 */
679 		__skb_push(skb2, nhs);
680 		skb_reset_network_header(skb2);
681 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
682 		memset(skb2->data + data_len - nhs, 0, nhs);
683 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
684 		 * and stored in reserved[0]
685 		 */
686 		info = (data_len/8) << 24;
687 	}
688 	if (type == ICMP_TIME_EXCEEDED)
689 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
690 			   info, &temp_saddr);
691 	else
692 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
693 			   info, &temp_saddr);
694 	if (rt)
695 		ip6_rt_put(rt);
696 
697 	kfree_skb(skb2);
698 
699 	return 0;
700 }
701 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
702 
703 static void icmpv6_echo_reply(struct sk_buff *skb)
704 {
705 	struct net *net = dev_net(skb->dev);
706 	struct sock *sk;
707 	struct inet6_dev *idev;
708 	struct ipv6_pinfo *np;
709 	const struct in6_addr *saddr = NULL;
710 	struct icmp6hdr *icmph = icmp6_hdr(skb);
711 	struct icmp6hdr tmp_hdr;
712 	struct flowi6 fl6;
713 	struct icmpv6_msg msg;
714 	struct dst_entry *dst;
715 	struct ipcm6_cookie ipc6;
716 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
717 	bool acast;
718 
719 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
720 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
721 		return;
722 
723 	saddr = &ipv6_hdr(skb)->daddr;
724 
725 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
726 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
727 		return;
728 
729 	if (!ipv6_unicast_destination(skb) &&
730 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
731 		saddr = NULL;
732 
733 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
734 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
735 
736 	memset(&fl6, 0, sizeof(fl6));
737 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
738 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
739 
740 	fl6.flowi6_proto = IPPROTO_ICMPV6;
741 	fl6.daddr = ipv6_hdr(skb)->saddr;
742 	if (saddr)
743 		fl6.saddr = *saddr;
744 	fl6.flowi6_oif = icmp6_iif(skb);
745 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
746 	fl6.flowi6_mark = mark;
747 	fl6.flowi6_uid = sock_net_uid(net, NULL);
748 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
749 
750 	local_bh_disable();
751 	sk = icmpv6_xmit_lock(net);
752 	if (!sk)
753 		goto out_bh_enable;
754 	np = inet6_sk(sk);
755 
756 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
757 		fl6.flowi6_oif = np->mcast_oif;
758 	else if (!fl6.flowi6_oif)
759 		fl6.flowi6_oif = np->ucast_oif;
760 
761 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
762 		goto out;
763 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
764 	if (IS_ERR(dst))
765 		goto out;
766 
767 	/* Check the ratelimit */
768 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
769 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
770 		goto out_dst_release;
771 
772 	idev = __in6_dev_get(skb->dev);
773 
774 	msg.skb = skb;
775 	msg.offset = 0;
776 	msg.type = ICMPV6_ECHO_REPLY;
777 
778 	ipcm6_init_sk(&ipc6, np);
779 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
780 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
781 	ipc6.sockc.mark = mark;
782 
783 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
784 			    skb->len + sizeof(struct icmp6hdr),
785 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
786 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
787 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
788 		ip6_flush_pending_frames(sk);
789 	} else {
790 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
791 					   skb->len + sizeof(struct icmp6hdr));
792 	}
793 out_dst_release:
794 	dst_release(dst);
795 out:
796 	icmpv6_xmit_unlock(sk);
797 out_bh_enable:
798 	local_bh_enable();
799 }
800 
801 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
802 {
803 	const struct inet6_protocol *ipprot;
804 	int inner_offset;
805 	__be16 frag_off;
806 	u8 nexthdr;
807 	struct net *net = dev_net(skb->dev);
808 
809 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
810 		goto out;
811 
812 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
813 	if (ipv6_ext_hdr(nexthdr)) {
814 		/* now skip over extension headers */
815 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
816 						&nexthdr, &frag_off);
817 		if (inner_offset < 0)
818 			goto out;
819 	} else {
820 		inner_offset = sizeof(struct ipv6hdr);
821 	}
822 
823 	/* Checkin header including 8 bytes of inner protocol header. */
824 	if (!pskb_may_pull(skb, inner_offset+8))
825 		goto out;
826 
827 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
828 	   Without this we will not able f.e. to make source routed
829 	   pmtu discovery.
830 	   Corresponding argument (opt) to notifiers is already added.
831 	   --ANK (980726)
832 	 */
833 
834 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
835 	if (ipprot && ipprot->err_handler)
836 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
837 
838 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
839 	return;
840 
841 out:
842 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
843 }
844 
845 /*
846  *	Handle icmp messages
847  */
848 
849 static int icmpv6_rcv(struct sk_buff *skb)
850 {
851 	struct net *net = dev_net(skb->dev);
852 	struct net_device *dev = icmp6_dev(skb);
853 	struct inet6_dev *idev = __in6_dev_get(dev);
854 	const struct in6_addr *saddr, *daddr;
855 	struct icmp6hdr *hdr;
856 	u8 type;
857 	bool success = false;
858 
859 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
860 		struct sec_path *sp = skb_sec_path(skb);
861 		int nh;
862 
863 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
864 				 XFRM_STATE_ICMP))
865 			goto drop_no_count;
866 
867 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
868 			goto drop_no_count;
869 
870 		nh = skb_network_offset(skb);
871 		skb_set_network_header(skb, sizeof(*hdr));
872 
873 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
874 			goto drop_no_count;
875 
876 		skb_set_network_header(skb, nh);
877 	}
878 
879 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
880 
881 	saddr = &ipv6_hdr(skb)->saddr;
882 	daddr = &ipv6_hdr(skb)->daddr;
883 
884 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
885 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
886 				    saddr, daddr);
887 		goto csum_error;
888 	}
889 
890 	if (!pskb_pull(skb, sizeof(*hdr)))
891 		goto discard_it;
892 
893 	hdr = icmp6_hdr(skb);
894 
895 	type = hdr->icmp6_type;
896 
897 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
898 
899 	switch (type) {
900 	case ICMPV6_ECHO_REQUEST:
901 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
902 			icmpv6_echo_reply(skb);
903 		break;
904 
905 	case ICMPV6_ECHO_REPLY:
906 		success = ping_rcv(skb);
907 		break;
908 
909 	case ICMPV6_PKT_TOOBIG:
910 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
911 		   standard destination cache. Seems, only "advanced"
912 		   destination cache will allow to solve this problem
913 		   --ANK (980726)
914 		 */
915 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
916 			goto discard_it;
917 		hdr = icmp6_hdr(skb);
918 
919 		/* to notify */
920 		fallthrough;
921 	case ICMPV6_DEST_UNREACH:
922 	case ICMPV6_TIME_EXCEED:
923 	case ICMPV6_PARAMPROB:
924 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
925 		break;
926 
927 	case NDISC_ROUTER_SOLICITATION:
928 	case NDISC_ROUTER_ADVERTISEMENT:
929 	case NDISC_NEIGHBOUR_SOLICITATION:
930 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
931 	case NDISC_REDIRECT:
932 		ndisc_rcv(skb);
933 		break;
934 
935 	case ICMPV6_MGM_QUERY:
936 		igmp6_event_query(skb);
937 		break;
938 
939 	case ICMPV6_MGM_REPORT:
940 		igmp6_event_report(skb);
941 		break;
942 
943 	case ICMPV6_MGM_REDUCTION:
944 	case ICMPV6_NI_QUERY:
945 	case ICMPV6_NI_REPLY:
946 	case ICMPV6_MLD2_REPORT:
947 	case ICMPV6_DHAAD_REQUEST:
948 	case ICMPV6_DHAAD_REPLY:
949 	case ICMPV6_MOBILE_PREFIX_SOL:
950 	case ICMPV6_MOBILE_PREFIX_ADV:
951 		break;
952 
953 	default:
954 		/* informational */
955 		if (type & ICMPV6_INFOMSG_MASK)
956 			break;
957 
958 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
959 				    saddr, daddr);
960 
961 		/*
962 		 * error of unknown type.
963 		 * must pass to upper level
964 		 */
965 
966 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
967 	}
968 
969 	/* until the v6 path can be better sorted assume failure and
970 	 * preserve the status quo behaviour for the rest of the paths to here
971 	 */
972 	if (success)
973 		consume_skb(skb);
974 	else
975 		kfree_skb(skb);
976 
977 	return 0;
978 
979 csum_error:
980 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
981 discard_it:
982 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
983 drop_no_count:
984 	kfree_skb(skb);
985 	return 0;
986 }
987 
988 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
989 		      u8 type,
990 		      const struct in6_addr *saddr,
991 		      const struct in6_addr *daddr,
992 		      int oif)
993 {
994 	memset(fl6, 0, sizeof(*fl6));
995 	fl6->saddr = *saddr;
996 	fl6->daddr = *daddr;
997 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
998 	fl6->fl6_icmp_type	= type;
999 	fl6->fl6_icmp_code	= 0;
1000 	fl6->flowi6_oif		= oif;
1001 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
1002 }
1003 
1004 static void __net_exit icmpv6_sk_exit(struct net *net)
1005 {
1006 	int i;
1007 
1008 	for_each_possible_cpu(i)
1009 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1010 	free_percpu(net->ipv6.icmp_sk);
1011 }
1012 
1013 static int __net_init icmpv6_sk_init(struct net *net)
1014 {
1015 	struct sock *sk;
1016 	int err, i;
1017 
1018 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1019 	if (!net->ipv6.icmp_sk)
1020 		return -ENOMEM;
1021 
1022 	for_each_possible_cpu(i) {
1023 		err = inet_ctl_sock_create(&sk, PF_INET6,
1024 					   SOCK_RAW, IPPROTO_ICMPV6, net);
1025 		if (err < 0) {
1026 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1027 			       err);
1028 			goto fail;
1029 		}
1030 
1031 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1032 
1033 		/* Enough space for 2 64K ICMP packets, including
1034 		 * sk_buff struct overhead.
1035 		 */
1036 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1037 	}
1038 	return 0;
1039 
1040  fail:
1041 	icmpv6_sk_exit(net);
1042 	return err;
1043 }
1044 
1045 static struct pernet_operations icmpv6_sk_ops = {
1046 	.init = icmpv6_sk_init,
1047 	.exit = icmpv6_sk_exit,
1048 };
1049 
1050 int __init icmpv6_init(void)
1051 {
1052 	int err;
1053 
1054 	err = register_pernet_subsys(&icmpv6_sk_ops);
1055 	if (err < 0)
1056 		return err;
1057 
1058 	err = -EAGAIN;
1059 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1060 		goto fail;
1061 
1062 	err = inet6_register_icmp_sender(icmp6_send);
1063 	if (err)
1064 		goto sender_reg_err;
1065 	return 0;
1066 
1067 sender_reg_err:
1068 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1069 fail:
1070 	pr_err("Failed to register ICMP6 protocol\n");
1071 	unregister_pernet_subsys(&icmpv6_sk_ops);
1072 	return err;
1073 }
1074 
1075 void icmpv6_cleanup(void)
1076 {
1077 	inet6_unregister_icmp_sender(icmp6_send);
1078 	unregister_pernet_subsys(&icmpv6_sk_ops);
1079 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1080 }
1081 
1082 
1083 static const struct icmp6_err {
1084 	int err;
1085 	int fatal;
1086 } tab_unreach[] = {
1087 	{	/* NOROUTE */
1088 		.err	= ENETUNREACH,
1089 		.fatal	= 0,
1090 	},
1091 	{	/* ADM_PROHIBITED */
1092 		.err	= EACCES,
1093 		.fatal	= 1,
1094 	},
1095 	{	/* Was NOT_NEIGHBOUR, now reserved */
1096 		.err	= EHOSTUNREACH,
1097 		.fatal	= 0,
1098 	},
1099 	{	/* ADDR_UNREACH	*/
1100 		.err	= EHOSTUNREACH,
1101 		.fatal	= 0,
1102 	},
1103 	{	/* PORT_UNREACH	*/
1104 		.err	= ECONNREFUSED,
1105 		.fatal	= 1,
1106 	},
1107 	{	/* POLICY_FAIL */
1108 		.err	= EACCES,
1109 		.fatal	= 1,
1110 	},
1111 	{	/* REJECT_ROUTE	*/
1112 		.err	= EACCES,
1113 		.fatal	= 1,
1114 	},
1115 };
1116 
1117 int icmpv6_err_convert(u8 type, u8 code, int *err)
1118 {
1119 	int fatal = 0;
1120 
1121 	*err = EPROTO;
1122 
1123 	switch (type) {
1124 	case ICMPV6_DEST_UNREACH:
1125 		fatal = 1;
1126 		if (code < ARRAY_SIZE(tab_unreach)) {
1127 			*err  = tab_unreach[code].err;
1128 			fatal = tab_unreach[code].fatal;
1129 		}
1130 		break;
1131 
1132 	case ICMPV6_PKT_TOOBIG:
1133 		*err = EMSGSIZE;
1134 		break;
1135 
1136 	case ICMPV6_PARAMPROB:
1137 		*err = EPROTO;
1138 		fatal = 1;
1139 		break;
1140 
1141 	case ICMPV6_TIME_EXCEED:
1142 		*err = EHOSTUNREACH;
1143 		break;
1144 	}
1145 
1146 	return fatal;
1147 }
1148 EXPORT_SYMBOL(icmpv6_err_convert);
1149 
1150 #ifdef CONFIG_SYSCTL
1151 static struct ctl_table ipv6_icmp_table_template[] = {
1152 	{
1153 		.procname	= "ratelimit",
1154 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1155 		.maxlen		= sizeof(int),
1156 		.mode		= 0644,
1157 		.proc_handler	= proc_dointvec_ms_jiffies,
1158 	},
1159 	{
1160 		.procname	= "echo_ignore_all",
1161 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1162 		.maxlen		= sizeof(int),
1163 		.mode		= 0644,
1164 		.proc_handler = proc_dointvec,
1165 	},
1166 	{
1167 		.procname	= "echo_ignore_multicast",
1168 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1169 		.maxlen		= sizeof(int),
1170 		.mode		= 0644,
1171 		.proc_handler = proc_dointvec,
1172 	},
1173 	{
1174 		.procname	= "echo_ignore_anycast",
1175 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1176 		.maxlen		= sizeof(int),
1177 		.mode		= 0644,
1178 		.proc_handler = proc_dointvec,
1179 	},
1180 	{
1181 		.procname	= "ratemask",
1182 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1183 		.maxlen		= ICMPV6_MSG_MAX + 1,
1184 		.mode		= 0644,
1185 		.proc_handler = proc_do_large_bitmap,
1186 	},
1187 	{ },
1188 };
1189 
1190 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1191 {
1192 	struct ctl_table *table;
1193 
1194 	table = kmemdup(ipv6_icmp_table_template,
1195 			sizeof(ipv6_icmp_table_template),
1196 			GFP_KERNEL);
1197 
1198 	if (table) {
1199 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1200 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1201 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1202 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1203 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1204 	}
1205 	return table;
1206 }
1207 #endif
1208