xref: /openbmc/linux/net/ipv6/icmp.c (revision 61f4d204)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69 
70 #include <linux/uaccess.h>
71 
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73 
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 		       u8 type, u8 code, int offset, __be32 info)
76 {
77 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 	struct net *net = dev_net(skb->dev);
80 
81 	if (type == ICMPV6_PKT_TOOBIG)
82 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 	else if (type == NDISC_REDIRECT)
84 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 			     sock_net_uid(net, NULL));
86 
87 	if (!(type & ICMPV6_INFOMSG_MASK))
88 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 			ping_err(skb, offset, ntohl(info));
90 
91 	return 0;
92 }
93 
94 static int icmpv6_rcv(struct sk_buff *skb);
95 
96 static const struct inet6_protocol icmpv6_protocol = {
97 	.handler	=	icmpv6_rcv,
98 	.err_handler	=	icmpv6_err,
99 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101 
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 	struct sock *sk;
106 
107 	sk = this_cpu_read(ipv6_icmp_sk);
108 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 		/* This can happen if the output path (f.e. SIT or
110 		 * ip6ip6 tunnel) signals dst_link_failure() for an
111 		 * outgoing ICMP6 packet.
112 		 */
113 		return NULL;
114 	}
115 	sock_net_set(sk, net);
116 	return sk;
117 }
118 
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 	sock_net_set(sk, &init_net);
122 	spin_unlock(&sk->sk_lock.slock);
123 }
124 
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *	- it was icmp error message.
130  *	- it is truncated, so that it is known, that protocol is ICMPV6
131  *	  (i.e. in the middle of some exthdr)
132  *
133  *	--ANK (980726)
134  */
135 
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 	int len = skb->len - ptr;
140 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 	__be16 frag_off;
142 
143 	if (len < 0)
144 		return true;
145 
146 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 	if (ptr < 0)
148 		return false;
149 	if (nexthdr == IPPROTO_ICMPV6) {
150 		u8 _type, *tp;
151 		tp = skb_header_pointer(skb,
152 			ptr+offsetof(struct icmp6hdr, icmp6_type),
153 			sizeof(_type), &_type);
154 
155 		/* Based on RFC 8200, Section 4.5 Fragment Header, return
156 		 * false if this is a fragment packet with no icmp header info.
157 		 */
158 		if (!tp && frag_off != 0)
159 			return false;
160 		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 			return true;
162 	}
163 	return false;
164 }
165 
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 	if (type > ICMPV6_MSG_MAX)
169 		return true;
170 
171 	/* Limit if icmp type is set in ratemask. */
172 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 		return true;
174 
175 	return false;
176 }
177 
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180 	if (icmpv6_mask_allow(net, type))
181 		return true;
182 
183 	if (icmp_global_allow())
184 		return true;
185 
186 	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	if (!res)
229 		__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
230 				  ICMP6_MIB_RATELIMITHOST);
231 	dst_release(dst);
232 	return res;
233 }
234 
235 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
236 				  struct flowi6 *fl6)
237 {
238 	struct net *net = sock_net(sk);
239 	struct dst_entry *dst;
240 	bool res = false;
241 
242 	dst = ip6_route_output(net, sk, fl6);
243 	if (!dst->error) {
244 		struct rt6_info *rt = (struct rt6_info *)dst;
245 		struct in6_addr prefsrc;
246 
247 		rt6_get_prefsrc(rt, &prefsrc);
248 		res = !ipv6_addr_any(&prefsrc);
249 	}
250 	dst_release(dst);
251 	return res;
252 }
253 
254 /*
255  *	an inline helper for the "simple" if statement below
256  *	checks if parameter problem report is caused by an
257  *	unrecognized IPv6 option that has the Option Type
258  *	highest-order two bits set to 10
259  */
260 
261 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
262 {
263 	u8 _optval, *op;
264 
265 	offset += skb_network_offset(skb);
266 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
267 	if (!op)
268 		return true;
269 	return (*op & 0xC0) == 0x80;
270 }
271 
272 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
273 				struct icmp6hdr *thdr, int len)
274 {
275 	struct sk_buff *skb;
276 	struct icmp6hdr *icmp6h;
277 
278 	skb = skb_peek(&sk->sk_write_queue);
279 	if (!skb)
280 		return;
281 
282 	icmp6h = icmp6_hdr(skb);
283 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
284 	icmp6h->icmp6_cksum = 0;
285 
286 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
287 		skb->csum = csum_partial(icmp6h,
288 					sizeof(struct icmp6hdr), skb->csum);
289 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
290 						      &fl6->daddr,
291 						      len, fl6->flowi6_proto,
292 						      skb->csum);
293 	} else {
294 		__wsum tmp_csum = 0;
295 
296 		skb_queue_walk(&sk->sk_write_queue, skb) {
297 			tmp_csum = csum_add(tmp_csum, skb->csum);
298 		}
299 
300 		tmp_csum = csum_partial(icmp6h,
301 					sizeof(struct icmp6hdr), tmp_csum);
302 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
303 						      &fl6->daddr,
304 						      len, fl6->flowi6_proto,
305 						      tmp_csum);
306 	}
307 	ip6_push_pending_frames(sk);
308 }
309 
310 struct icmpv6_msg {
311 	struct sk_buff	*skb;
312 	int		offset;
313 	uint8_t		type;
314 };
315 
316 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
317 {
318 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
319 	struct sk_buff *org_skb = msg->skb;
320 	__wsum csum;
321 
322 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
323 				      to, len);
324 	skb->csum = csum_block_add(skb->csum, csum, odd);
325 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
326 		nf_ct_attach(skb, org_skb);
327 	return 0;
328 }
329 
330 #if IS_ENABLED(CONFIG_IPV6_MIP6)
331 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
332 {
333 	struct ipv6hdr *iph = ipv6_hdr(skb);
334 	struct ipv6_destopt_hao *hao;
335 	int off;
336 
337 	if (opt->dsthao) {
338 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
339 		if (likely(off >= 0)) {
340 			hao = (struct ipv6_destopt_hao *)
341 					(skb_network_header(skb) + off);
342 			swap(iph->saddr, hao->addr);
343 		}
344 	}
345 }
346 #else
347 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
348 #endif
349 
350 static struct dst_entry *icmpv6_route_lookup(struct net *net,
351 					     struct sk_buff *skb,
352 					     struct sock *sk,
353 					     struct flowi6 *fl6)
354 {
355 	struct dst_entry *dst, *dst2;
356 	struct flowi6 fl2;
357 	int err;
358 
359 	err = ip6_dst_lookup(net, sk, &dst, fl6);
360 	if (err)
361 		return ERR_PTR(err);
362 
363 	/*
364 	 * We won't send icmp if the destination is known
365 	 * anycast unless we need to treat anycast as unicast.
366 	 */
367 	if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
368 	    ipv6_anycast_destination(dst, &fl6->daddr)) {
369 		net_dbg_ratelimited("icmp6_send: acast source\n");
370 		dst_release(dst);
371 		return ERR_PTR(-EINVAL);
372 	}
373 
374 	/* No need to clone since we're just using its address. */
375 	dst2 = dst;
376 
377 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
378 	if (!IS_ERR(dst)) {
379 		if (dst != dst2)
380 			return dst;
381 	} else {
382 		if (PTR_ERR(dst) == -EPERM)
383 			dst = NULL;
384 		else
385 			return dst;
386 	}
387 
388 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
389 	if (err)
390 		goto relookup_failed;
391 
392 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
393 	if (err)
394 		goto relookup_failed;
395 
396 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
397 	if (!IS_ERR(dst2)) {
398 		dst_release(dst);
399 		dst = dst2;
400 	} else {
401 		err = PTR_ERR(dst2);
402 		if (err == -EPERM) {
403 			dst_release(dst);
404 			return dst2;
405 		} else
406 			goto relookup_failed;
407 	}
408 
409 relookup_failed:
410 	if (dst)
411 		return dst;
412 	return ERR_PTR(err);
413 }
414 
415 static struct net_device *icmp6_dev(const struct sk_buff *skb)
416 {
417 	struct net_device *dev = skb->dev;
418 
419 	/* for local traffic to local address, skb dev is the loopback
420 	 * device. Check if there is a dst attached to the skb and if so
421 	 * get the real device index. Same is needed for replies to a link
422 	 * local address on a device enslaved to an L3 master device
423 	 */
424 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
425 		const struct rt6_info *rt6 = skb_rt6_info(skb);
426 
427 		/* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
428 		 * and ip6_null_entry could be set to skb if no route is found.
429 		 */
430 		if (rt6 && rt6->rt6i_idev)
431 			dev = rt6->rt6i_idev->dev;
432 	}
433 
434 	return dev;
435 }
436 
437 static int icmp6_iif(const struct sk_buff *skb)
438 {
439 	return icmp6_dev(skb)->ifindex;
440 }
441 
442 /*
443  *	Send an ICMP message in response to a packet in error
444  */
445 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
446 		const struct in6_addr *force_saddr,
447 		const struct inet6_skb_parm *parm)
448 {
449 	struct inet6_dev *idev = NULL;
450 	struct ipv6hdr *hdr = ipv6_hdr(skb);
451 	struct sock *sk;
452 	struct net *net;
453 	struct ipv6_pinfo *np;
454 	const struct in6_addr *saddr = NULL;
455 	struct dst_entry *dst;
456 	struct icmp6hdr tmp_hdr;
457 	struct flowi6 fl6;
458 	struct icmpv6_msg msg;
459 	struct ipcm6_cookie ipc6;
460 	int iif = 0;
461 	int addr_type = 0;
462 	int len;
463 	u32 mark;
464 
465 	if ((u8 *)hdr < skb->head ||
466 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
467 		return;
468 
469 	if (!skb->dev)
470 		return;
471 	net = dev_net(skb->dev);
472 	mark = IP6_REPLY_MARK(net, skb->mark);
473 	/*
474 	 *	Make sure we respect the rules
475 	 *	i.e. RFC 1885 2.4(e)
476 	 *	Rule (e.1) is enforced by not using icmp6_send
477 	 *	in any code that processes icmp errors.
478 	 */
479 	addr_type = ipv6_addr_type(&hdr->daddr);
480 
481 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
482 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
483 		saddr = &hdr->daddr;
484 
485 	/*
486 	 *	Dest addr check
487 	 */
488 
489 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
490 		if (type != ICMPV6_PKT_TOOBIG &&
491 		    !(type == ICMPV6_PARAMPROB &&
492 		      code == ICMPV6_UNK_OPTION &&
493 		      (opt_unrec(skb, info))))
494 			return;
495 
496 		saddr = NULL;
497 	}
498 
499 	addr_type = ipv6_addr_type(&hdr->saddr);
500 
501 	/*
502 	 *	Source addr check
503 	 */
504 
505 	if (__ipv6_addr_needs_scope_id(addr_type)) {
506 		iif = icmp6_iif(skb);
507 	} else {
508 		/*
509 		 * The source device is used for looking up which routing table
510 		 * to use for sending an ICMP error.
511 		 */
512 		iif = l3mdev_master_ifindex(skb->dev);
513 	}
514 
515 	/*
516 	 *	Must not send error if the source does not uniquely
517 	 *	identify a single node (RFC2463 Section 2.4).
518 	 *	We check unspecified / multicast addresses here,
519 	 *	and anycast addresses will be checked later.
520 	 */
521 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
522 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
523 				    &hdr->saddr, &hdr->daddr);
524 		return;
525 	}
526 
527 	/*
528 	 *	Never answer to a ICMP packet.
529 	 */
530 	if (is_ineligible(skb)) {
531 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
532 				    &hdr->saddr, &hdr->daddr);
533 		return;
534 	}
535 
536 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
537 	local_bh_disable();
538 
539 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
540 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
541 		goto out_bh_enable;
542 
543 	mip6_addr_swap(skb, parm);
544 
545 	sk = icmpv6_xmit_lock(net);
546 	if (!sk)
547 		goto out_bh_enable;
548 
549 	memset(&fl6, 0, sizeof(fl6));
550 	fl6.flowi6_proto = IPPROTO_ICMPV6;
551 	fl6.daddr = hdr->saddr;
552 	if (force_saddr)
553 		saddr = force_saddr;
554 	if (saddr) {
555 		fl6.saddr = *saddr;
556 	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
557 		/* select a more meaningful saddr from input if */
558 		struct net_device *in_netdev;
559 
560 		in_netdev = dev_get_by_index(net, parm->iif);
561 		if (in_netdev) {
562 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
563 					   inet6_sk(sk)->srcprefs,
564 					   &fl6.saddr);
565 			dev_put(in_netdev);
566 		}
567 	}
568 	fl6.flowi6_mark = mark;
569 	fl6.flowi6_oif = iif;
570 	fl6.fl6_icmp_type = type;
571 	fl6.fl6_icmp_code = code;
572 	fl6.flowi6_uid = sock_net_uid(net, NULL);
573 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
574 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
575 
576 	np = inet6_sk(sk);
577 
578 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
579 		goto out;
580 
581 	tmp_hdr.icmp6_type = type;
582 	tmp_hdr.icmp6_code = code;
583 	tmp_hdr.icmp6_cksum = 0;
584 	tmp_hdr.icmp6_pointer = htonl(info);
585 
586 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
587 		fl6.flowi6_oif = np->mcast_oif;
588 	else if (!fl6.flowi6_oif)
589 		fl6.flowi6_oif = np->ucast_oif;
590 
591 	ipcm6_init_sk(&ipc6, np);
592 	ipc6.sockc.mark = mark;
593 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
594 
595 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
596 	if (IS_ERR(dst))
597 		goto out;
598 
599 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
600 
601 	msg.skb = skb;
602 	msg.offset = skb_network_offset(skb);
603 	msg.type = type;
604 
605 	len = skb->len - msg.offset;
606 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
607 	if (len < 0) {
608 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
609 				    &hdr->saddr, &hdr->daddr);
610 		goto out_dst_release;
611 	}
612 
613 	rcu_read_lock();
614 	idev = __in6_dev_get(skb->dev);
615 
616 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
617 			    len + sizeof(struct icmp6hdr),
618 			    sizeof(struct icmp6hdr),
619 			    &ipc6, &fl6, (struct rt6_info *)dst,
620 			    MSG_DONTWAIT)) {
621 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
622 		ip6_flush_pending_frames(sk);
623 	} else {
624 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
625 					   len + sizeof(struct icmp6hdr));
626 	}
627 	rcu_read_unlock();
628 out_dst_release:
629 	dst_release(dst);
630 out:
631 	icmpv6_xmit_unlock(sk);
632 out_bh_enable:
633 	local_bh_enable();
634 }
635 EXPORT_SYMBOL(icmp6_send);
636 
637 /* Slightly more convenient version of icmp6_send with drop reasons.
638  */
639 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
640 			      enum skb_drop_reason reason)
641 {
642 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
643 	kfree_skb_reason(skb, reason);
644 }
645 
646 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
647  * if sufficient data bytes are available
648  * @nhs is the size of the tunnel header(s) :
649  *  Either an IPv4 header for SIT encap
650  *         an IPv4 header + GRE header for GRE encap
651  */
652 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
653 			       unsigned int data_len)
654 {
655 	struct in6_addr temp_saddr;
656 	struct rt6_info *rt;
657 	struct sk_buff *skb2;
658 	u32 info = 0;
659 
660 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
661 		return 1;
662 
663 	/* RFC 4884 (partial) support for ICMP extensions */
664 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
665 		data_len = 0;
666 
667 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
668 
669 	if (!skb2)
670 		return 1;
671 
672 	skb_dst_drop(skb2);
673 	skb_pull(skb2, nhs);
674 	skb_reset_network_header(skb2);
675 
676 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
677 			skb, 0);
678 
679 	if (rt && rt->dst.dev)
680 		skb2->dev = rt->dst.dev;
681 
682 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
683 
684 	if (data_len) {
685 		/* RFC 4884 (partial) support :
686 		 * insert 0 padding at the end, before the extensions
687 		 */
688 		__skb_push(skb2, nhs);
689 		skb_reset_network_header(skb2);
690 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
691 		memset(skb2->data + data_len - nhs, 0, nhs);
692 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
693 		 * and stored in reserved[0]
694 		 */
695 		info = (data_len/8) << 24;
696 	}
697 	if (type == ICMP_TIME_EXCEEDED)
698 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
699 			   info, &temp_saddr, IP6CB(skb2));
700 	else
701 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
702 			   info, &temp_saddr, IP6CB(skb2));
703 	if (rt)
704 		ip6_rt_put(rt);
705 
706 	kfree_skb(skb2);
707 
708 	return 0;
709 }
710 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
711 
712 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
713 {
714 	struct net *net = dev_net(skb->dev);
715 	struct sock *sk;
716 	struct inet6_dev *idev;
717 	struct ipv6_pinfo *np;
718 	const struct in6_addr *saddr = NULL;
719 	struct icmp6hdr *icmph = icmp6_hdr(skb);
720 	struct icmp6hdr tmp_hdr;
721 	struct flowi6 fl6;
722 	struct icmpv6_msg msg;
723 	struct dst_entry *dst;
724 	struct ipcm6_cookie ipc6;
725 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
726 	SKB_DR(reason);
727 	bool acast;
728 	u8 type;
729 
730 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
731 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
732 		return reason;
733 
734 	saddr = &ipv6_hdr(skb)->daddr;
735 
736 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
737 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
738 		return reason;
739 
740 	if (!ipv6_unicast_destination(skb) &&
741 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
742 		saddr = NULL;
743 
744 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
745 		type = ICMPV6_EXT_ECHO_REPLY;
746 	else
747 		type = ICMPV6_ECHO_REPLY;
748 
749 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
750 	tmp_hdr.icmp6_type = type;
751 
752 	memset(&fl6, 0, sizeof(fl6));
753 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
754 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
755 
756 	fl6.flowi6_proto = IPPROTO_ICMPV6;
757 	fl6.daddr = ipv6_hdr(skb)->saddr;
758 	if (saddr)
759 		fl6.saddr = *saddr;
760 	fl6.flowi6_oif = icmp6_iif(skb);
761 	fl6.fl6_icmp_type = type;
762 	fl6.flowi6_mark = mark;
763 	fl6.flowi6_uid = sock_net_uid(net, NULL);
764 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
765 
766 	local_bh_disable();
767 	sk = icmpv6_xmit_lock(net);
768 	if (!sk)
769 		goto out_bh_enable;
770 	np = inet6_sk(sk);
771 
772 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
773 		fl6.flowi6_oif = np->mcast_oif;
774 	else if (!fl6.flowi6_oif)
775 		fl6.flowi6_oif = np->ucast_oif;
776 
777 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
778 		goto out;
779 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
780 	if (IS_ERR(dst))
781 		goto out;
782 
783 	/* Check the ratelimit */
784 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
785 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
786 		goto out_dst_release;
787 
788 	idev = __in6_dev_get(skb->dev);
789 
790 	msg.skb = skb;
791 	msg.offset = 0;
792 	msg.type = type;
793 
794 	ipcm6_init_sk(&ipc6, np);
795 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
796 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
797 	ipc6.sockc.mark = mark;
798 
799 	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
800 		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
801 			goto out_dst_release;
802 
803 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
804 			    skb->len + sizeof(struct icmp6hdr),
805 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
806 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
807 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
808 		ip6_flush_pending_frames(sk);
809 	} else {
810 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
811 					   skb->len + sizeof(struct icmp6hdr));
812 		reason = SKB_CONSUMED;
813 	}
814 out_dst_release:
815 	dst_release(dst);
816 out:
817 	icmpv6_xmit_unlock(sk);
818 out_bh_enable:
819 	local_bh_enable();
820 	return reason;
821 }
822 
823 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
824 				   u8 code, __be32 info)
825 {
826 	struct inet6_skb_parm *opt = IP6CB(skb);
827 	struct net *net = dev_net(skb->dev);
828 	const struct inet6_protocol *ipprot;
829 	enum skb_drop_reason reason;
830 	int inner_offset;
831 	__be16 frag_off;
832 	u8 nexthdr;
833 
834 	reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
835 	if (reason != SKB_NOT_DROPPED_YET)
836 		goto out;
837 
838 	seg6_icmp_srh(skb, opt);
839 
840 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
841 	if (ipv6_ext_hdr(nexthdr)) {
842 		/* now skip over extension headers */
843 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
844 						&nexthdr, &frag_off);
845 		if (inner_offset < 0) {
846 			SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
847 			goto out;
848 		}
849 	} else {
850 		inner_offset = sizeof(struct ipv6hdr);
851 	}
852 
853 	/* Checkin header including 8 bytes of inner protocol header. */
854 	reason = pskb_may_pull_reason(skb, inner_offset + 8);
855 	if (reason != SKB_NOT_DROPPED_YET)
856 		goto out;
857 
858 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
859 	   Without this we will not able f.e. to make source routed
860 	   pmtu discovery.
861 	   Corresponding argument (opt) to notifiers is already added.
862 	   --ANK (980726)
863 	 */
864 
865 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
866 	if (ipprot && ipprot->err_handler)
867 		ipprot->err_handler(skb, opt, type, code, inner_offset, info);
868 
869 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
870 	return SKB_CONSUMED;
871 
872 out:
873 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
874 	return reason;
875 }
876 
877 /*
878  *	Handle icmp messages
879  */
880 
881 static int icmpv6_rcv(struct sk_buff *skb)
882 {
883 	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
884 	struct net *net = dev_net(skb->dev);
885 	struct net_device *dev = icmp6_dev(skb);
886 	struct inet6_dev *idev = __in6_dev_get(dev);
887 	const struct in6_addr *saddr, *daddr;
888 	struct icmp6hdr *hdr;
889 	u8 type;
890 
891 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
892 		struct sec_path *sp = skb_sec_path(skb);
893 		int nh;
894 
895 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
896 				 XFRM_STATE_ICMP)) {
897 			reason = SKB_DROP_REASON_XFRM_POLICY;
898 			goto drop_no_count;
899 		}
900 
901 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
902 			goto drop_no_count;
903 
904 		nh = skb_network_offset(skb);
905 		skb_set_network_header(skb, sizeof(*hdr));
906 
907 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
908 						skb)) {
909 			reason = SKB_DROP_REASON_XFRM_POLICY;
910 			goto drop_no_count;
911 		}
912 
913 		skb_set_network_header(skb, nh);
914 	}
915 
916 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
917 
918 	saddr = &ipv6_hdr(skb)->saddr;
919 	daddr = &ipv6_hdr(skb)->daddr;
920 
921 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
922 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
923 				    saddr, daddr);
924 		goto csum_error;
925 	}
926 
927 	if (!pskb_pull(skb, sizeof(*hdr)))
928 		goto discard_it;
929 
930 	hdr = icmp6_hdr(skb);
931 
932 	type = hdr->icmp6_type;
933 
934 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
935 
936 	switch (type) {
937 	case ICMPV6_ECHO_REQUEST:
938 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
939 			reason = icmpv6_echo_reply(skb);
940 		break;
941 	case ICMPV6_EXT_ECHO_REQUEST:
942 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
943 		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
944 			reason = icmpv6_echo_reply(skb);
945 		break;
946 
947 	case ICMPV6_ECHO_REPLY:
948 		reason = ping_rcv(skb);
949 		break;
950 
951 	case ICMPV6_EXT_ECHO_REPLY:
952 		reason = ping_rcv(skb);
953 		break;
954 
955 	case ICMPV6_PKT_TOOBIG:
956 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
957 		   standard destination cache. Seems, only "advanced"
958 		   destination cache will allow to solve this problem
959 		   --ANK (980726)
960 		 */
961 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
962 			goto discard_it;
963 		hdr = icmp6_hdr(skb);
964 
965 		/* to notify */
966 		fallthrough;
967 	case ICMPV6_DEST_UNREACH:
968 	case ICMPV6_TIME_EXCEED:
969 	case ICMPV6_PARAMPROB:
970 		reason = icmpv6_notify(skb, type, hdr->icmp6_code,
971 				       hdr->icmp6_mtu);
972 		break;
973 
974 	case NDISC_ROUTER_SOLICITATION:
975 	case NDISC_ROUTER_ADVERTISEMENT:
976 	case NDISC_NEIGHBOUR_SOLICITATION:
977 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
978 	case NDISC_REDIRECT:
979 		reason = ndisc_rcv(skb);
980 		break;
981 
982 	case ICMPV6_MGM_QUERY:
983 		igmp6_event_query(skb);
984 		return 0;
985 
986 	case ICMPV6_MGM_REPORT:
987 		igmp6_event_report(skb);
988 		return 0;
989 
990 	case ICMPV6_MGM_REDUCTION:
991 	case ICMPV6_NI_QUERY:
992 	case ICMPV6_NI_REPLY:
993 	case ICMPV6_MLD2_REPORT:
994 	case ICMPV6_DHAAD_REQUEST:
995 	case ICMPV6_DHAAD_REPLY:
996 	case ICMPV6_MOBILE_PREFIX_SOL:
997 	case ICMPV6_MOBILE_PREFIX_ADV:
998 		break;
999 
1000 	default:
1001 		/* informational */
1002 		if (type & ICMPV6_INFOMSG_MASK)
1003 			break;
1004 
1005 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1006 				    saddr, daddr);
1007 
1008 		/*
1009 		 * error of unknown type.
1010 		 * must pass to upper level
1011 		 */
1012 
1013 		reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1014 				       hdr->icmp6_mtu);
1015 	}
1016 
1017 	/* until the v6 path can be better sorted assume failure and
1018 	 * preserve the status quo behaviour for the rest of the paths to here
1019 	 */
1020 	if (reason)
1021 		kfree_skb_reason(skb, reason);
1022 	else
1023 		consume_skb(skb);
1024 
1025 	return 0;
1026 
1027 csum_error:
1028 	reason = SKB_DROP_REASON_ICMP_CSUM;
1029 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1030 discard_it:
1031 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1032 drop_no_count:
1033 	kfree_skb_reason(skb, reason);
1034 	return 0;
1035 }
1036 
1037 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1038 		      const struct in6_addr *saddr,
1039 		      const struct in6_addr *daddr, int oif)
1040 {
1041 	memset(fl6, 0, sizeof(*fl6));
1042 	fl6->saddr = *saddr;
1043 	fl6->daddr = *daddr;
1044 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1045 	fl6->fl6_icmp_type	= type;
1046 	fl6->fl6_icmp_code	= 0;
1047 	fl6->flowi6_oif		= oif;
1048 	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1049 }
1050 
1051 int __init icmpv6_init(void)
1052 {
1053 	struct sock *sk;
1054 	int err, i;
1055 
1056 	for_each_possible_cpu(i) {
1057 		err = inet_ctl_sock_create(&sk, PF_INET6,
1058 					   SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1059 		if (err < 0) {
1060 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1061 			       err);
1062 			return err;
1063 		}
1064 
1065 		per_cpu(ipv6_icmp_sk, i) = sk;
1066 
1067 		/* Enough space for 2 64K ICMP packets, including
1068 		 * sk_buff struct overhead.
1069 		 */
1070 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1071 	}
1072 
1073 	err = -EAGAIN;
1074 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1075 		goto fail;
1076 
1077 	err = inet6_register_icmp_sender(icmp6_send);
1078 	if (err)
1079 		goto sender_reg_err;
1080 	return 0;
1081 
1082 sender_reg_err:
1083 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1084 fail:
1085 	pr_err("Failed to register ICMP6 protocol\n");
1086 	return err;
1087 }
1088 
1089 void icmpv6_cleanup(void)
1090 {
1091 	inet6_unregister_icmp_sender(icmp6_send);
1092 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1093 }
1094 
1095 
1096 static const struct icmp6_err {
1097 	int err;
1098 	int fatal;
1099 } tab_unreach[] = {
1100 	{	/* NOROUTE */
1101 		.err	= ENETUNREACH,
1102 		.fatal	= 0,
1103 	},
1104 	{	/* ADM_PROHIBITED */
1105 		.err	= EACCES,
1106 		.fatal	= 1,
1107 	},
1108 	{	/* Was NOT_NEIGHBOUR, now reserved */
1109 		.err	= EHOSTUNREACH,
1110 		.fatal	= 0,
1111 	},
1112 	{	/* ADDR_UNREACH	*/
1113 		.err	= EHOSTUNREACH,
1114 		.fatal	= 0,
1115 	},
1116 	{	/* PORT_UNREACH	*/
1117 		.err	= ECONNREFUSED,
1118 		.fatal	= 1,
1119 	},
1120 	{	/* POLICY_FAIL */
1121 		.err	= EACCES,
1122 		.fatal	= 1,
1123 	},
1124 	{	/* REJECT_ROUTE	*/
1125 		.err	= EACCES,
1126 		.fatal	= 1,
1127 	},
1128 };
1129 
1130 int icmpv6_err_convert(u8 type, u8 code, int *err)
1131 {
1132 	int fatal = 0;
1133 
1134 	*err = EPROTO;
1135 
1136 	switch (type) {
1137 	case ICMPV6_DEST_UNREACH:
1138 		fatal = 1;
1139 		if (code < ARRAY_SIZE(tab_unreach)) {
1140 			*err  = tab_unreach[code].err;
1141 			fatal = tab_unreach[code].fatal;
1142 		}
1143 		break;
1144 
1145 	case ICMPV6_PKT_TOOBIG:
1146 		*err = EMSGSIZE;
1147 		break;
1148 
1149 	case ICMPV6_PARAMPROB:
1150 		*err = EPROTO;
1151 		fatal = 1;
1152 		break;
1153 
1154 	case ICMPV6_TIME_EXCEED:
1155 		*err = EHOSTUNREACH;
1156 		break;
1157 	}
1158 
1159 	return fatal;
1160 }
1161 EXPORT_SYMBOL(icmpv6_err_convert);
1162 
1163 #ifdef CONFIG_SYSCTL
1164 static struct ctl_table ipv6_icmp_table_template[] = {
1165 	{
1166 		.procname	= "ratelimit",
1167 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1168 		.maxlen		= sizeof(int),
1169 		.mode		= 0644,
1170 		.proc_handler	= proc_dointvec_ms_jiffies,
1171 	},
1172 	{
1173 		.procname	= "echo_ignore_all",
1174 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1175 		.maxlen		= sizeof(u8),
1176 		.mode		= 0644,
1177 		.proc_handler = proc_dou8vec_minmax,
1178 	},
1179 	{
1180 		.procname	= "echo_ignore_multicast",
1181 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1182 		.maxlen		= sizeof(u8),
1183 		.mode		= 0644,
1184 		.proc_handler = proc_dou8vec_minmax,
1185 	},
1186 	{
1187 		.procname	= "echo_ignore_anycast",
1188 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1189 		.maxlen		= sizeof(u8),
1190 		.mode		= 0644,
1191 		.proc_handler = proc_dou8vec_minmax,
1192 	},
1193 	{
1194 		.procname	= "ratemask",
1195 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1196 		.maxlen		= ICMPV6_MSG_MAX + 1,
1197 		.mode		= 0644,
1198 		.proc_handler = proc_do_large_bitmap,
1199 	},
1200 	{
1201 		.procname	= "error_anycast_as_unicast",
1202 		.data		= &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1203 		.maxlen		= sizeof(u8),
1204 		.mode		= 0644,
1205 		.proc_handler	= proc_dou8vec_minmax,
1206 		.extra1		= SYSCTL_ZERO,
1207 		.extra2		= SYSCTL_ONE,
1208 	},
1209 	{ },
1210 };
1211 
1212 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1213 {
1214 	struct ctl_table *table;
1215 
1216 	table = kmemdup(ipv6_icmp_table_template,
1217 			sizeof(ipv6_icmp_table_template),
1218 			GFP_KERNEL);
1219 
1220 	if (table) {
1221 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1222 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1223 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1224 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1225 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1226 		table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1227 	}
1228 	return table;
1229 }
1230 #endif
1231