xref: /openbmc/linux/net/ipv6/icmp.c (revision b384c95a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180 	if (icmpv6_mask_allow(net, type))
181 		return true;
182 
183 	if (icmp_global_allow())
184 		return true;
185 
186 	return false;
187 }
188 
189 /*
190  * Check the ICMP output rate limit
191  */
192 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
193 			       struct flowi6 *fl6)
194 {
195 	struct net *net = sock_net(sk);
196 	struct dst_entry *dst;
197 	bool res = false;
198 
199 	if (icmpv6_mask_allow(net, type))
200 		return true;
201 
202 	/*
203 	 * Look up the output route.
204 	 * XXX: perhaps the expire for routing entries cloned by
205 	 * this lookup should be more aggressive (not longer than timeout).
206 	 */
207 	dst = ip6_route_output(net, sk, fl6);
208 	if (dst->error) {
209 		IP6_INC_STATS(net, ip6_dst_idev(dst),
210 			      IPSTATS_MIB_OUTNOROUTES);
211 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
212 		res = true;
213 	} else {
214 		struct rt6_info *rt = (struct rt6_info *)dst;
215 		int tmo = net->ipv6.sysctl.icmpv6_time;
216 		struct inet_peer *peer;
217 
218 		/* Give more bandwidth to wider prefixes. */
219 		if (rt->rt6i_dst.plen < 128)
220 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
221 
222 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
223 		res = inet_peer_xrlim_allow(peer, tmo);
224 		if (peer)
225 			inet_putpeer(peer);
226 	}
227 	dst_release(dst);
228 	return res;
229 }
230 
231 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
232 				  struct flowi6 *fl6)
233 {
234 	struct net *net = sock_net(sk);
235 	struct dst_entry *dst;
236 	bool res = false;
237 
238 	dst = ip6_route_output(net, sk, fl6);
239 	if (!dst->error) {
240 		struct rt6_info *rt = (struct rt6_info *)dst;
241 		struct in6_addr prefsrc;
242 
243 		rt6_get_prefsrc(rt, &prefsrc);
244 		res = !ipv6_addr_any(&prefsrc);
245 	}
246 	dst_release(dst);
247 	return res;
248 }
249 
250 /*
251  *	an inline helper for the "simple" if statement below
252  *	checks if parameter problem report is caused by an
253  *	unrecognized IPv6 option that has the Option Type
254  *	highest-order two bits set to 10
255  */
256 
257 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
258 {
259 	u8 _optval, *op;
260 
261 	offset += skb_network_offset(skb);
262 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
263 	if (!op)
264 		return true;
265 	return (*op & 0xC0) == 0x80;
266 }
267 
268 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
269 				struct icmp6hdr *thdr, int len)
270 {
271 	struct sk_buff *skb;
272 	struct icmp6hdr *icmp6h;
273 
274 	skb = skb_peek(&sk->sk_write_queue);
275 	if (!skb)
276 		return;
277 
278 	icmp6h = icmp6_hdr(skb);
279 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
280 	icmp6h->icmp6_cksum = 0;
281 
282 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
283 		skb->csum = csum_partial(icmp6h,
284 					sizeof(struct icmp6hdr), skb->csum);
285 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
286 						      &fl6->daddr,
287 						      len, fl6->flowi6_proto,
288 						      skb->csum);
289 	} else {
290 		__wsum tmp_csum = 0;
291 
292 		skb_queue_walk(&sk->sk_write_queue, skb) {
293 			tmp_csum = csum_add(tmp_csum, skb->csum);
294 		}
295 
296 		tmp_csum = csum_partial(icmp6h,
297 					sizeof(struct icmp6hdr), tmp_csum);
298 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
299 						      &fl6->daddr,
300 						      len, fl6->flowi6_proto,
301 						      tmp_csum);
302 	}
303 	ip6_push_pending_frames(sk);
304 }
305 
306 struct icmpv6_msg {
307 	struct sk_buff	*skb;
308 	int		offset;
309 	uint8_t		type;
310 };
311 
312 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
313 {
314 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
315 	struct sk_buff *org_skb = msg->skb;
316 	__wsum csum;
317 
318 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
319 				      to, len);
320 	skb->csum = csum_block_add(skb->csum, csum, odd);
321 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
322 		nf_ct_attach(skb, org_skb);
323 	return 0;
324 }
325 
326 #if IS_ENABLED(CONFIG_IPV6_MIP6)
327 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
328 {
329 	struct ipv6hdr *iph = ipv6_hdr(skb);
330 	struct ipv6_destopt_hao *hao;
331 	struct in6_addr tmp;
332 	int off;
333 
334 	if (opt->dsthao) {
335 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
336 		if (likely(off >= 0)) {
337 			hao = (struct ipv6_destopt_hao *)
338 					(skb_network_header(skb) + off);
339 			tmp = iph->saddr;
340 			iph->saddr = hao->addr;
341 			hao->addr = tmp;
342 		}
343 	}
344 }
345 #else
346 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
347 #endif
348 
349 static struct dst_entry *icmpv6_route_lookup(struct net *net,
350 					     struct sk_buff *skb,
351 					     struct sock *sk,
352 					     struct flowi6 *fl6)
353 {
354 	struct dst_entry *dst, *dst2;
355 	struct flowi6 fl2;
356 	int err;
357 
358 	err = ip6_dst_lookup(net, sk, &dst, fl6);
359 	if (err)
360 		return ERR_PTR(err);
361 
362 	/*
363 	 * We won't send icmp if the destination is known
364 	 * anycast.
365 	 */
366 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
367 		net_dbg_ratelimited("icmp6_send: acast source\n");
368 		dst_release(dst);
369 		return ERR_PTR(-EINVAL);
370 	}
371 
372 	/* No need to clone since we're just using its address. */
373 	dst2 = dst;
374 
375 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
376 	if (!IS_ERR(dst)) {
377 		if (dst != dst2)
378 			return dst;
379 	} else {
380 		if (PTR_ERR(dst) == -EPERM)
381 			dst = NULL;
382 		else
383 			return dst;
384 	}
385 
386 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
387 	if (err)
388 		goto relookup_failed;
389 
390 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
391 	if (err)
392 		goto relookup_failed;
393 
394 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
395 	if (!IS_ERR(dst2)) {
396 		dst_release(dst);
397 		dst = dst2;
398 	} else {
399 		err = PTR_ERR(dst2);
400 		if (err == -EPERM) {
401 			dst_release(dst);
402 			return dst2;
403 		} else
404 			goto relookup_failed;
405 	}
406 
407 relookup_failed:
408 	if (dst)
409 		return dst;
410 	return ERR_PTR(err);
411 }
412 
413 static struct net_device *icmp6_dev(const struct sk_buff *skb)
414 {
415 	struct net_device *dev = skb->dev;
416 
417 	/* for local traffic to local address, skb dev is the loopback
418 	 * device. Check if there is a dst attached to the skb and if so
419 	 * get the real device index. Same is needed for replies to a link
420 	 * local address on a device enslaved to an L3 master device
421 	 */
422 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
423 		const struct rt6_info *rt6 = skb_rt6_info(skb);
424 
425 		if (rt6)
426 			dev = rt6->rt6i_idev->dev;
427 	}
428 
429 	return dev;
430 }
431 
432 static int icmp6_iif(const struct sk_buff *skb)
433 {
434 	return icmp6_dev(skb)->ifindex;
435 }
436 
437 /*
438  *	Send an ICMP message in response to a packet in error
439  */
440 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
441 		const struct in6_addr *force_saddr,
442 		const struct inet6_skb_parm *parm)
443 {
444 	struct inet6_dev *idev = NULL;
445 	struct ipv6hdr *hdr = ipv6_hdr(skb);
446 	struct sock *sk;
447 	struct net *net;
448 	struct ipv6_pinfo *np;
449 	const struct in6_addr *saddr = NULL;
450 	struct dst_entry *dst;
451 	struct icmp6hdr tmp_hdr;
452 	struct flowi6 fl6;
453 	struct icmpv6_msg msg;
454 	struct ipcm6_cookie ipc6;
455 	int iif = 0;
456 	int addr_type = 0;
457 	int len;
458 	u32 mark;
459 
460 	if ((u8 *)hdr < skb->head ||
461 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
462 		return;
463 
464 	if (!skb->dev)
465 		return;
466 	net = dev_net(skb->dev);
467 	mark = IP6_REPLY_MARK(net, skb->mark);
468 	/*
469 	 *	Make sure we respect the rules
470 	 *	i.e. RFC 1885 2.4(e)
471 	 *	Rule (e.1) is enforced by not using icmp6_send
472 	 *	in any code that processes icmp errors.
473 	 */
474 	addr_type = ipv6_addr_type(&hdr->daddr);
475 
476 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
477 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
478 		saddr = &hdr->daddr;
479 
480 	/*
481 	 *	Dest addr check
482 	 */
483 
484 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
485 		if (type != ICMPV6_PKT_TOOBIG &&
486 		    !(type == ICMPV6_PARAMPROB &&
487 		      code == ICMPV6_UNK_OPTION &&
488 		      (opt_unrec(skb, info))))
489 			return;
490 
491 		saddr = NULL;
492 	}
493 
494 	addr_type = ipv6_addr_type(&hdr->saddr);
495 
496 	/*
497 	 *	Source addr check
498 	 */
499 
500 	if (__ipv6_addr_needs_scope_id(addr_type)) {
501 		iif = icmp6_iif(skb);
502 	} else {
503 		/*
504 		 * The source device is used for looking up which routing table
505 		 * to use for sending an ICMP error.
506 		 */
507 		iif = l3mdev_master_ifindex(skb->dev);
508 	}
509 
510 	/*
511 	 *	Must not send error if the source does not uniquely
512 	 *	identify a single node (RFC2463 Section 2.4).
513 	 *	We check unspecified / multicast addresses here,
514 	 *	and anycast addresses will be checked later.
515 	 */
516 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
517 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
518 				    &hdr->saddr, &hdr->daddr);
519 		return;
520 	}
521 
522 	/*
523 	 *	Never answer to a ICMP packet.
524 	 */
525 	if (is_ineligible(skb)) {
526 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
527 				    &hdr->saddr, &hdr->daddr);
528 		return;
529 	}
530 
531 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
532 	local_bh_disable();
533 
534 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
535 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
536 		goto out_bh_enable;
537 
538 	mip6_addr_swap(skb, parm);
539 
540 	sk = icmpv6_xmit_lock(net);
541 	if (!sk)
542 		goto out_bh_enable;
543 
544 	memset(&fl6, 0, sizeof(fl6));
545 	fl6.flowi6_proto = IPPROTO_ICMPV6;
546 	fl6.daddr = hdr->saddr;
547 	if (force_saddr)
548 		saddr = force_saddr;
549 	if (saddr) {
550 		fl6.saddr = *saddr;
551 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
552 		/* select a more meaningful saddr from input if */
553 		struct net_device *in_netdev;
554 
555 		in_netdev = dev_get_by_index(net, parm->iif);
556 		if (in_netdev) {
557 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
558 					   inet6_sk(sk)->srcprefs,
559 					   &fl6.saddr);
560 			dev_put(in_netdev);
561 		}
562 	}
563 	fl6.flowi6_mark = mark;
564 	fl6.flowi6_oif = iif;
565 	fl6.fl6_icmp_type = type;
566 	fl6.fl6_icmp_code = code;
567 	fl6.flowi6_uid = sock_net_uid(net, NULL);
568 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
569 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
570 
571 	np = inet6_sk(sk);
572 
573 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
574 		goto out;
575 
576 	tmp_hdr.icmp6_type = type;
577 	tmp_hdr.icmp6_code = code;
578 	tmp_hdr.icmp6_cksum = 0;
579 	tmp_hdr.icmp6_pointer = htonl(info);
580 
581 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
582 		fl6.flowi6_oif = np->mcast_oif;
583 	else if (!fl6.flowi6_oif)
584 		fl6.flowi6_oif = np->ucast_oif;
585 
586 	ipcm6_init_sk(&ipc6, np);
587 	ipc6.sockc.mark = mark;
588 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
589 
590 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
591 	if (IS_ERR(dst))
592 		goto out;
593 
594 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
595 
596 	msg.skb = skb;
597 	msg.offset = skb_network_offset(skb);
598 	msg.type = type;
599 
600 	len = skb->len - msg.offset;
601 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
602 	if (len < 0) {
603 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
604 				    &hdr->saddr, &hdr->daddr);
605 		goto out_dst_release;
606 	}
607 
608 	rcu_read_lock();
609 	idev = __in6_dev_get(skb->dev);
610 
611 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
612 			    len + sizeof(struct icmp6hdr),
613 			    sizeof(struct icmp6hdr),
614 			    &ipc6, &fl6, (struct rt6_info *)dst,
615 			    MSG_DONTWAIT)) {
616 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
617 		ip6_flush_pending_frames(sk);
618 	} else {
619 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
620 					   len + sizeof(struct icmp6hdr));
621 	}
622 	rcu_read_unlock();
623 out_dst_release:
624 	dst_release(dst);
625 out:
626 	icmpv6_xmit_unlock(sk);
627 out_bh_enable:
628 	local_bh_enable();
629 }
630 EXPORT_SYMBOL(icmp6_send);
631 
632 /* Slightly more convenient version of icmp6_send.
633  */
634 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
635 {
636 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
637 	kfree_skb(skb);
638 }
639 
640 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
641  * if sufficient data bytes are available
642  * @nhs is the size of the tunnel header(s) :
643  *  Either an IPv4 header for SIT encap
644  *         an IPv4 header + GRE header for GRE encap
645  */
646 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
647 			       unsigned int data_len)
648 {
649 	struct in6_addr temp_saddr;
650 	struct rt6_info *rt;
651 	struct sk_buff *skb2;
652 	u32 info = 0;
653 
654 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
655 		return 1;
656 
657 	/* RFC 4884 (partial) support for ICMP extensions */
658 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
659 		data_len = 0;
660 
661 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
662 
663 	if (!skb2)
664 		return 1;
665 
666 	skb_dst_drop(skb2);
667 	skb_pull(skb2, nhs);
668 	skb_reset_network_header(skb2);
669 
670 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
671 			skb, 0);
672 
673 	if (rt && rt->dst.dev)
674 		skb2->dev = rt->dst.dev;
675 
676 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
677 
678 	if (data_len) {
679 		/* RFC 4884 (partial) support :
680 		 * insert 0 padding at the end, before the extensions
681 		 */
682 		__skb_push(skb2, nhs);
683 		skb_reset_network_header(skb2);
684 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
685 		memset(skb2->data + data_len - nhs, 0, nhs);
686 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
687 		 * and stored in reserved[0]
688 		 */
689 		info = (data_len/8) << 24;
690 	}
691 	if (type == ICMP_TIME_EXCEEDED)
692 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
693 			   info, &temp_saddr, IP6CB(skb2));
694 	else
695 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
696 			   info, &temp_saddr, IP6CB(skb2));
697 	if (rt)
698 		ip6_rt_put(rt);
699 
700 	kfree_skb(skb2);
701 
702 	return 0;
703 }
704 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
705 
706 static void icmpv6_echo_reply(struct sk_buff *skb)
707 {
708 	struct net *net = dev_net(skb->dev);
709 	struct sock *sk;
710 	struct inet6_dev *idev;
711 	struct ipv6_pinfo *np;
712 	const struct in6_addr *saddr = NULL;
713 	struct icmp6hdr *icmph = icmp6_hdr(skb);
714 	struct icmp6hdr tmp_hdr;
715 	struct flowi6 fl6;
716 	struct icmpv6_msg msg;
717 	struct dst_entry *dst;
718 	struct ipcm6_cookie ipc6;
719 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
720 	bool acast;
721 	u8 type;
722 
723 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
724 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
725 		return;
726 
727 	saddr = &ipv6_hdr(skb)->daddr;
728 
729 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
730 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
731 		return;
732 
733 	if (!ipv6_unicast_destination(skb) &&
734 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
735 		saddr = NULL;
736 
737 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
738 		type = ICMPV6_EXT_ECHO_REPLY;
739 	else
740 		type = ICMPV6_ECHO_REPLY;
741 
742 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
743 	tmp_hdr.icmp6_type = type;
744 
745 	memset(&fl6, 0, sizeof(fl6));
746 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
747 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
748 
749 	fl6.flowi6_proto = IPPROTO_ICMPV6;
750 	fl6.daddr = ipv6_hdr(skb)->saddr;
751 	if (saddr)
752 		fl6.saddr = *saddr;
753 	fl6.flowi6_oif = icmp6_iif(skb);
754 	fl6.fl6_icmp_type = type;
755 	fl6.flowi6_mark = mark;
756 	fl6.flowi6_uid = sock_net_uid(net, NULL);
757 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
758 
759 	local_bh_disable();
760 	sk = icmpv6_xmit_lock(net);
761 	if (!sk)
762 		goto out_bh_enable;
763 	np = inet6_sk(sk);
764 
765 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
766 		fl6.flowi6_oif = np->mcast_oif;
767 	else if (!fl6.flowi6_oif)
768 		fl6.flowi6_oif = np->ucast_oif;
769 
770 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
771 		goto out;
772 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
773 	if (IS_ERR(dst))
774 		goto out;
775 
776 	/* Check the ratelimit */
777 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
778 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
779 		goto out_dst_release;
780 
781 	idev = __in6_dev_get(skb->dev);
782 
783 	msg.skb = skb;
784 	msg.offset = 0;
785 	msg.type = type;
786 
787 	ipcm6_init_sk(&ipc6, np);
788 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
789 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
790 	ipc6.sockc.mark = mark;
791 
792 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
793 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
794 			goto out_dst_release;
795 
796 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
797 			    skb->len + sizeof(struct icmp6hdr),
798 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
799 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
800 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
801 		ip6_flush_pending_frames(sk);
802 	} else {
803 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
804 					   skb->len + sizeof(struct icmp6hdr));
805 	}
806 out_dst_release:
807 	dst_release(dst);
808 out:
809 	icmpv6_xmit_unlock(sk);
810 out_bh_enable:
811 	local_bh_enable();
812 }
813 
814 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
815 {
816 	struct inet6_skb_parm *opt = IP6CB(skb);
817 	const struct inet6_protocol *ipprot;
818 	int inner_offset;
819 	__be16 frag_off;
820 	u8 nexthdr;
821 	struct net *net = dev_net(skb->dev);
822 
823 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
824 		goto out;
825 
826 	seg6_icmp_srh(skb, opt);
827 
828 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
829 	if (ipv6_ext_hdr(nexthdr)) {
830 		/* now skip over extension headers */
831 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
832 						&nexthdr, &frag_off);
833 		if (inner_offset < 0)
834 			goto out;
835 	} else {
836 		inner_offset = sizeof(struct ipv6hdr);
837 	}
838 
839 	/* Checkin header including 8 bytes of inner protocol header. */
840 	if (!pskb_may_pull(skb, inner_offset+8))
841 		goto out;
842 
843 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
844 	   Without this we will not able f.e. to make source routed
845 	   pmtu discovery.
846 	   Corresponding argument (opt) to notifiers is already added.
847 	   --ANK (980726)
848 	 */
849 
850 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
851 	if (ipprot && ipprot->err_handler)
852 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
853 
854 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
855 	return;
856 
857 out:
858 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
859 }
860 
861 /*
862  *	Handle icmp messages
863  */
864 
865 static int icmpv6_rcv(struct sk_buff *skb)
866 {
867 	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
868 	struct net *net = dev_net(skb->dev);
869 	struct net_device *dev = icmp6_dev(skb);
870 	struct inet6_dev *idev = __in6_dev_get(dev);
871 	const struct in6_addr *saddr, *daddr;
872 	struct icmp6hdr *hdr;
873 	u8 type;
874 
875 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
876 		struct sec_path *sp = skb_sec_path(skb);
877 		int nh;
878 
879 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
880 				 XFRM_STATE_ICMP)) {
881 			reason = SKB_DROP_REASON_XFRM_POLICY;
882 			goto drop_no_count;
883 		}
884 
885 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
886 			goto drop_no_count;
887 
888 		nh = skb_network_offset(skb);
889 		skb_set_network_header(skb, sizeof(*hdr));
890 
891 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
892 						skb)) {
893 			reason = SKB_DROP_REASON_XFRM_POLICY;
894 			goto drop_no_count;
895 		}
896 
897 		skb_set_network_header(skb, nh);
898 	}
899 
900 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
901 
902 	saddr = &ipv6_hdr(skb)->saddr;
903 	daddr = &ipv6_hdr(skb)->daddr;
904 
905 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
906 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
907 				    saddr, daddr);
908 		goto csum_error;
909 	}
910 
911 	if (!pskb_pull(skb, sizeof(*hdr)))
912 		goto discard_it;
913 
914 	hdr = icmp6_hdr(skb);
915 
916 	type = hdr->icmp6_type;
917 
918 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
919 
920 	switch (type) {
921 	case ICMPV6_ECHO_REQUEST:
922 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
923 			icmpv6_echo_reply(skb);
924 		break;
925 	case ICMPV6_EXT_ECHO_REQUEST:
926 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
927 		    net->ipv4.sysctl_icmp_echo_enable_probe)
928 			icmpv6_echo_reply(skb);
929 		break;
930 
931 	case ICMPV6_ECHO_REPLY:
932 		reason = ping_rcv(skb);
933 		break;
934 
935 	case ICMPV6_EXT_ECHO_REPLY:
936 		reason = ping_rcv(skb);
937 		break;
938 
939 	case ICMPV6_PKT_TOOBIG:
940 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
941 		   standard destination cache. Seems, only "advanced"
942 		   destination cache will allow to solve this problem
943 		   --ANK (980726)
944 		 */
945 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
946 			goto discard_it;
947 		hdr = icmp6_hdr(skb);
948 
949 		/* to notify */
950 		fallthrough;
951 	case ICMPV6_DEST_UNREACH:
952 	case ICMPV6_TIME_EXCEED:
953 	case ICMPV6_PARAMPROB:
954 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
955 		break;
956 
957 	case NDISC_ROUTER_SOLICITATION:
958 	case NDISC_ROUTER_ADVERTISEMENT:
959 	case NDISC_NEIGHBOUR_SOLICITATION:
960 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
961 	case NDISC_REDIRECT:
962 		ndisc_rcv(skb);
963 		break;
964 
965 	case ICMPV6_MGM_QUERY:
966 		igmp6_event_query(skb);
967 		return 0;
968 
969 	case ICMPV6_MGM_REPORT:
970 		igmp6_event_report(skb);
971 		return 0;
972 
973 	case ICMPV6_MGM_REDUCTION:
974 	case ICMPV6_NI_QUERY:
975 	case ICMPV6_NI_REPLY:
976 	case ICMPV6_MLD2_REPORT:
977 	case ICMPV6_DHAAD_REQUEST:
978 	case ICMPV6_DHAAD_REPLY:
979 	case ICMPV6_MOBILE_PREFIX_SOL:
980 	case ICMPV6_MOBILE_PREFIX_ADV:
981 		break;
982 
983 	default:
984 		/* informational */
985 		if (type & ICMPV6_INFOMSG_MASK)
986 			break;
987 
988 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
989 				    saddr, daddr);
990 
991 		/*
992 		 * error of unknown type.
993 		 * must pass to upper level
994 		 */
995 
996 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
997 	}
998 
999 	/* until the v6 path can be better sorted assume failure and
1000 	 * preserve the status quo behaviour for the rest of the paths to here
1001 	 */
1002 	if (reason)
1003 		kfree_skb_reason(skb, reason);
1004 	else
1005 		consume_skb(skb);
1006 
1007 	return 0;
1008 
1009 csum_error:
1010 	reason = SKB_DROP_REASON_ICMP_CSUM;
1011 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1012 discard_it:
1013 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1014 drop_no_count:
1015 	kfree_skb_reason(skb, reason);
1016 	return 0;
1017 }
1018 
1019 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1020 		      u8 type,
1021 		      const struct in6_addr *saddr,
1022 		      const struct in6_addr *daddr,
1023 		      int oif)
1024 {
1025 	memset(fl6, 0, sizeof(*fl6));
1026 	fl6->saddr = *saddr;
1027 	fl6->daddr = *daddr;
1028 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1029 	fl6->fl6_icmp_type	= type;
1030 	fl6->fl6_icmp_code	= 0;
1031 	fl6->flowi6_oif		= oif;
1032 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1033 }
1034 
1035 int __init icmpv6_init(void)
1036 {
1037 	struct sock *sk;
1038 	int err, i;
1039 
1040 	for_each_possible_cpu(i) {
1041 		err = inet_ctl_sock_create(&sk, PF_INET6,
1042 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1043 		if (err < 0) {
1044 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1045 			       err);
1046 			return err;
1047 		}
1048 
1049 		per_cpu(ipv6_icmp_sk, i) = sk;
1050 
1051 		/* Enough space for 2 64K ICMP packets, including
1052 		 * sk_buff struct overhead.
1053 		 */
1054 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1055 	}
1056 
1057 	err = -EAGAIN;
1058 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1059 		goto fail;
1060 
1061 	err = inet6_register_icmp_sender(icmp6_send);
1062 	if (err)
1063 		goto sender_reg_err;
1064 	return 0;
1065 
1066 sender_reg_err:
1067 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1068 fail:
1069 	pr_err("Failed to register ICMP6 protocol\n");
1070 	return err;
1071 }
1072 
1073 void icmpv6_cleanup(void)
1074 {
1075 	inet6_unregister_icmp_sender(icmp6_send);
1076 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1077 }
1078 
1079 
1080 static const struct icmp6_err {
1081 	int err;
1082 	int fatal;
1083 } tab_unreach[] = {
1084 	{	/* NOROUTE */
1085 		.err	= ENETUNREACH,
1086 		.fatal	= 0,
1087 	},
1088 	{	/* ADM_PROHIBITED */
1089 		.err	= EACCES,
1090 		.fatal	= 1,
1091 	},
1092 	{	/* Was NOT_NEIGHBOUR, now reserved */
1093 		.err	= EHOSTUNREACH,
1094 		.fatal	= 0,
1095 	},
1096 	{	/* ADDR_UNREACH	*/
1097 		.err	= EHOSTUNREACH,
1098 		.fatal	= 0,
1099 	},
1100 	{	/* PORT_UNREACH	*/
1101 		.err	= ECONNREFUSED,
1102 		.fatal	= 1,
1103 	},
1104 	{	/* POLICY_FAIL */
1105 		.err	= EACCES,
1106 		.fatal	= 1,
1107 	},
1108 	{	/* REJECT_ROUTE	*/
1109 		.err	= EACCES,
1110 		.fatal	= 1,
1111 	},
1112 };
1113 
1114 int icmpv6_err_convert(u8 type, u8 code, int *err)
1115 {
1116 	int fatal = 0;
1117 
1118 	*err = EPROTO;
1119 
1120 	switch (type) {
1121 	case ICMPV6_DEST_UNREACH:
1122 		fatal = 1;
1123 		if (code < ARRAY_SIZE(tab_unreach)) {
1124 			*err  = tab_unreach[code].err;
1125 			fatal = tab_unreach[code].fatal;
1126 		}
1127 		break;
1128 
1129 	case ICMPV6_PKT_TOOBIG:
1130 		*err = EMSGSIZE;
1131 		break;
1132 
1133 	case ICMPV6_PARAMPROB:
1134 		*err = EPROTO;
1135 		fatal = 1;
1136 		break;
1137 
1138 	case ICMPV6_TIME_EXCEED:
1139 		*err = EHOSTUNREACH;
1140 		break;
1141 	}
1142 
1143 	return fatal;
1144 }
1145 EXPORT_SYMBOL(icmpv6_err_convert);
1146 
1147 #ifdef CONFIG_SYSCTL
1148 static struct ctl_table ipv6_icmp_table_template[] = {
1149 	{
1150 		.procname	= "ratelimit",
1151 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1152 		.maxlen		= sizeof(int),
1153 		.mode		= 0644,
1154 		.proc_handler	= proc_dointvec_ms_jiffies,
1155 	},
1156 	{
1157 		.procname	= "echo_ignore_all",
1158 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1159 		.maxlen		= sizeof(u8),
1160 		.mode		= 0644,
1161 		.proc_handler = proc_dou8vec_minmax,
1162 	},
1163 	{
1164 		.procname	= "echo_ignore_multicast",
1165 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1166 		.maxlen		= sizeof(u8),
1167 		.mode		= 0644,
1168 		.proc_handler = proc_dou8vec_minmax,
1169 	},
1170 	{
1171 		.procname	= "echo_ignore_anycast",
1172 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1173 		.maxlen		= sizeof(u8),
1174 		.mode		= 0644,
1175 		.proc_handler = proc_dou8vec_minmax,
1176 	},
1177 	{
1178 		.procname	= "ratemask",
1179 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1180 		.maxlen		= ICMPV6_MSG_MAX + 1,
1181 		.mode		= 0644,
1182 		.proc_handler = proc_do_large_bitmap,
1183 	},
1184 	{ },
1185 };
1186 
1187 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1188 {
1189 	struct ctl_table *table;
1190 
1191 	table = kmemdup(ipv6_icmp_table_template,
1192 			sizeof(ipv6_icmp_table_template),
1193 			GFP_KERNEL);
1194 
1195 	if (table) {
1196 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1197 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1198 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1199 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1200 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1201 	}
1202 	return table;
1203 }
1204 #endif
1205