xref: /openbmc/linux/net/ipv6/icmp.c (revision e1ae5c2e)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
63 #include <net/icmp.h>
64 #include <net/xfrm.h>
65 #include <net/inet_common.h>
66 #include <net/dsfield.h>
67 #include <net/l3mdev.h>
68 
69 #include <linux/uaccess.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static inline struct sock *icmpv6_sk(struct net *net)
79 {
80 	return *this_cpu_ptr(net->ipv6.icmp_sk);
81 }
82 
83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84 		       u8 type, u8 code, int offset, __be32 info)
85 {
86 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88 	struct net *net = dev_net(skb->dev);
89 
90 	if (type == ICMPV6_PKT_TOOBIG)
91 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92 	else if (type == NDISC_REDIRECT)
93 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
94 			     sock_net_uid(net, NULL));
95 
96 	if (!(type & ICMPV6_INFOMSG_MASK))
97 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98 			ping_err(skb, offset, ntohl(info));
99 
100 	return 0;
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 /* Called with BH disabled */
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		return NULL;
123 	}
124 	return sk;
125 }
126 
127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128 {
129 	spin_unlock(&sk->sk_lock.slock);
130 }
131 
132 /*
133  * Figure out, may we reply to this packet with icmp error.
134  *
135  * We do not reply, if:
136  *	- it was icmp error message.
137  *	- it is truncated, so that it is known, that protocol is ICMPV6
138  *	  (i.e. in the middle of some exthdr)
139  *
140  *	--ANK (980726)
141  */
142 
143 static bool is_ineligible(const struct sk_buff *skb)
144 {
145 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146 	int len = skb->len - ptr;
147 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148 	__be16 frag_off;
149 
150 	if (len < 0)
151 		return true;
152 
153 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154 	if (ptr < 0)
155 		return false;
156 	if (nexthdr == IPPROTO_ICMPV6) {
157 		u8 _type, *tp;
158 		tp = skb_header_pointer(skb,
159 			ptr+offsetof(struct icmp6hdr, icmp6_type),
160 			sizeof(_type), &_type);
161 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
162 			return true;
163 	}
164 	return false;
165 }
166 
167 static bool icmpv6_mask_allow(struct net *net, int type)
168 {
169 	if (type > ICMPV6_MSG_MAX)
170 		return true;
171 
172 	/* Limit if icmp type is set in ratemask. */
173 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
174 		return true;
175 
176 	return false;
177 }
178 
179 static bool icmpv6_global_allow(struct net *net, int type)
180 {
181 	if (icmpv6_mask_allow(net, type))
182 		return true;
183 
184 	if (icmp_global_allow())
185 		return true;
186 
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	dst_release(dst);
229 	return res;
230 }
231 
232 /*
233  *	an inline helper for the "simple" if statement below
234  *	checks if parameter problem report is caused by an
235  *	unrecognized IPv6 option that has the Option Type
236  *	highest-order two bits set to 10
237  */
238 
239 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
240 {
241 	u8 _optval, *op;
242 
243 	offset += skb_network_offset(skb);
244 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
245 	if (!op)
246 		return true;
247 	return (*op & 0xC0) == 0x80;
248 }
249 
250 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
251 				struct icmp6hdr *thdr, int len)
252 {
253 	struct sk_buff *skb;
254 	struct icmp6hdr *icmp6h;
255 
256 	skb = skb_peek(&sk->sk_write_queue);
257 	if (!skb)
258 		return;
259 
260 	icmp6h = icmp6_hdr(skb);
261 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
262 	icmp6h->icmp6_cksum = 0;
263 
264 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
265 		skb->csum = csum_partial(icmp6h,
266 					sizeof(struct icmp6hdr), skb->csum);
267 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
268 						      &fl6->daddr,
269 						      len, fl6->flowi6_proto,
270 						      skb->csum);
271 	} else {
272 		__wsum tmp_csum = 0;
273 
274 		skb_queue_walk(&sk->sk_write_queue, skb) {
275 			tmp_csum = csum_add(tmp_csum, skb->csum);
276 		}
277 
278 		tmp_csum = csum_partial(icmp6h,
279 					sizeof(struct icmp6hdr), tmp_csum);
280 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
281 						      &fl6->daddr,
282 						      len, fl6->flowi6_proto,
283 						      tmp_csum);
284 	}
285 	ip6_push_pending_frames(sk);
286 }
287 
288 struct icmpv6_msg {
289 	struct sk_buff	*skb;
290 	int		offset;
291 	uint8_t		type;
292 };
293 
294 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
295 {
296 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
297 	struct sk_buff *org_skb = msg->skb;
298 	__wsum csum = 0;
299 
300 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
301 				      to, len, csum);
302 	skb->csum = csum_block_add(skb->csum, csum, odd);
303 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
304 		nf_ct_attach(skb, org_skb);
305 	return 0;
306 }
307 
308 #if IS_ENABLED(CONFIG_IPV6_MIP6)
309 static void mip6_addr_swap(struct sk_buff *skb)
310 {
311 	struct ipv6hdr *iph = ipv6_hdr(skb);
312 	struct inet6_skb_parm *opt = IP6CB(skb);
313 	struct ipv6_destopt_hao *hao;
314 	struct in6_addr tmp;
315 	int off;
316 
317 	if (opt->dsthao) {
318 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
319 		if (likely(off >= 0)) {
320 			hao = (struct ipv6_destopt_hao *)
321 					(skb_network_header(skb) + off);
322 			tmp = iph->saddr;
323 			iph->saddr = hao->addr;
324 			hao->addr = tmp;
325 		}
326 	}
327 }
328 #else
329 static inline void mip6_addr_swap(struct sk_buff *skb) {}
330 #endif
331 
332 static struct dst_entry *icmpv6_route_lookup(struct net *net,
333 					     struct sk_buff *skb,
334 					     struct sock *sk,
335 					     struct flowi6 *fl6)
336 {
337 	struct dst_entry *dst, *dst2;
338 	struct flowi6 fl2;
339 	int err;
340 
341 	err = ip6_dst_lookup(net, sk, &dst, fl6);
342 	if (err)
343 		return ERR_PTR(err);
344 
345 	/*
346 	 * We won't send icmp if the destination is known
347 	 * anycast.
348 	 */
349 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
350 		net_dbg_ratelimited("icmp6_send: acast source\n");
351 		dst_release(dst);
352 		return ERR_PTR(-EINVAL);
353 	}
354 
355 	/* No need to clone since we're just using its address. */
356 	dst2 = dst;
357 
358 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
359 	if (!IS_ERR(dst)) {
360 		if (dst != dst2)
361 			return dst;
362 	} else {
363 		if (PTR_ERR(dst) == -EPERM)
364 			dst = NULL;
365 		else
366 			return dst;
367 	}
368 
369 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
370 	if (err)
371 		goto relookup_failed;
372 
373 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
374 	if (err)
375 		goto relookup_failed;
376 
377 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
378 	if (!IS_ERR(dst2)) {
379 		dst_release(dst);
380 		dst = dst2;
381 	} else {
382 		err = PTR_ERR(dst2);
383 		if (err == -EPERM) {
384 			dst_release(dst);
385 			return dst2;
386 		} else
387 			goto relookup_failed;
388 	}
389 
390 relookup_failed:
391 	if (dst)
392 		return dst;
393 	return ERR_PTR(err);
394 }
395 
396 static struct net_device *icmp6_dev(const struct sk_buff *skb)
397 {
398 	struct net_device *dev = skb->dev;
399 
400 	/* for local traffic to local address, skb dev is the loopback
401 	 * device. Check if there is a dst attached to the skb and if so
402 	 * get the real device index. Same is needed for replies to a link
403 	 * local address on a device enslaved to an L3 master device
404 	 */
405 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
406 		const struct rt6_info *rt6 = skb_rt6_info(skb);
407 
408 		if (rt6)
409 			dev = rt6->rt6i_idev->dev;
410 	}
411 
412 	return dev;
413 }
414 
415 static int icmp6_iif(const struct sk_buff *skb)
416 {
417 	return icmp6_dev(skb)->ifindex;
418 }
419 
420 /*
421  *	Send an ICMP message in response to a packet in error
422  */
423 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
424 		       const struct in6_addr *force_saddr)
425 {
426 	struct inet6_dev *idev = NULL;
427 	struct ipv6hdr *hdr = ipv6_hdr(skb);
428 	struct sock *sk;
429 	struct net *net;
430 	struct ipv6_pinfo *np;
431 	const struct in6_addr *saddr = NULL;
432 	struct dst_entry *dst;
433 	struct icmp6hdr tmp_hdr;
434 	struct flowi6 fl6;
435 	struct icmpv6_msg msg;
436 	struct ipcm6_cookie ipc6;
437 	int iif = 0;
438 	int addr_type = 0;
439 	int len;
440 	u32 mark;
441 
442 	if ((u8 *)hdr < skb->head ||
443 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
444 		return;
445 
446 	if (!skb->dev)
447 		return;
448 	net = dev_net(skb->dev);
449 	mark = IP6_REPLY_MARK(net, skb->mark);
450 	/*
451 	 *	Make sure we respect the rules
452 	 *	i.e. RFC 1885 2.4(e)
453 	 *	Rule (e.1) is enforced by not using icmp6_send
454 	 *	in any code that processes icmp errors.
455 	 */
456 	addr_type = ipv6_addr_type(&hdr->daddr);
457 
458 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
459 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
460 		saddr = &hdr->daddr;
461 
462 	/*
463 	 *	Dest addr check
464 	 */
465 
466 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
467 		if (type != ICMPV6_PKT_TOOBIG &&
468 		    !(type == ICMPV6_PARAMPROB &&
469 		      code == ICMPV6_UNK_OPTION &&
470 		      (opt_unrec(skb, info))))
471 			return;
472 
473 		saddr = NULL;
474 	}
475 
476 	addr_type = ipv6_addr_type(&hdr->saddr);
477 
478 	/*
479 	 *	Source addr check
480 	 */
481 
482 	if (__ipv6_addr_needs_scope_id(addr_type)) {
483 		iif = icmp6_iif(skb);
484 	} else {
485 		dst = skb_dst(skb);
486 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
487 	}
488 
489 	/*
490 	 *	Must not send error if the source does not uniquely
491 	 *	identify a single node (RFC2463 Section 2.4).
492 	 *	We check unspecified / multicast addresses here,
493 	 *	and anycast addresses will be checked later.
494 	 */
495 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
496 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
497 				    &hdr->saddr, &hdr->daddr);
498 		return;
499 	}
500 
501 	/*
502 	 *	Never answer to a ICMP packet.
503 	 */
504 	if (is_ineligible(skb)) {
505 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
506 				    &hdr->saddr, &hdr->daddr);
507 		return;
508 	}
509 
510 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
511 	local_bh_disable();
512 
513 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
514 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
515 		goto out_bh_enable;
516 
517 	mip6_addr_swap(skb);
518 
519 	memset(&fl6, 0, sizeof(fl6));
520 	fl6.flowi6_proto = IPPROTO_ICMPV6;
521 	fl6.daddr = hdr->saddr;
522 	if (force_saddr)
523 		saddr = force_saddr;
524 	if (saddr)
525 		fl6.saddr = *saddr;
526 	fl6.flowi6_mark = mark;
527 	fl6.flowi6_oif = iif;
528 	fl6.fl6_icmp_type = type;
529 	fl6.fl6_icmp_code = code;
530 	fl6.flowi6_uid = sock_net_uid(net, NULL);
531 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
532 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
533 
534 	sk = icmpv6_xmit_lock(net);
535 	if (!sk)
536 		goto out_bh_enable;
537 
538 	sk->sk_mark = mark;
539 	np = inet6_sk(sk);
540 
541 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
542 		goto out;
543 
544 	tmp_hdr.icmp6_type = type;
545 	tmp_hdr.icmp6_code = code;
546 	tmp_hdr.icmp6_cksum = 0;
547 	tmp_hdr.icmp6_pointer = htonl(info);
548 
549 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
550 		fl6.flowi6_oif = np->mcast_oif;
551 	else if (!fl6.flowi6_oif)
552 		fl6.flowi6_oif = np->ucast_oif;
553 
554 	ipcm6_init_sk(&ipc6, np);
555 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
556 
557 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
558 	if (IS_ERR(dst))
559 		goto out;
560 
561 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
562 
563 	msg.skb = skb;
564 	msg.offset = skb_network_offset(skb);
565 	msg.type = type;
566 
567 	len = skb->len - msg.offset;
568 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
569 	if (len < 0) {
570 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
571 				    &hdr->saddr, &hdr->daddr);
572 		goto out_dst_release;
573 	}
574 
575 	rcu_read_lock();
576 	idev = __in6_dev_get(skb->dev);
577 
578 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
579 			    len + sizeof(struct icmp6hdr),
580 			    sizeof(struct icmp6hdr),
581 			    &ipc6, &fl6, (struct rt6_info *)dst,
582 			    MSG_DONTWAIT)) {
583 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
584 		ip6_flush_pending_frames(sk);
585 	} else {
586 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
587 					   len + sizeof(struct icmp6hdr));
588 	}
589 	rcu_read_unlock();
590 out_dst_release:
591 	dst_release(dst);
592 out:
593 	icmpv6_xmit_unlock(sk);
594 out_bh_enable:
595 	local_bh_enable();
596 }
597 
598 /* Slightly more convenient version of icmp6_send.
599  */
600 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
601 {
602 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
603 	kfree_skb(skb);
604 }
605 
606 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
607  * if sufficient data bytes are available
608  * @nhs is the size of the tunnel header(s) :
609  *  Either an IPv4 header for SIT encap
610  *         an IPv4 header + GRE header for GRE encap
611  */
612 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
613 			       unsigned int data_len)
614 {
615 	struct in6_addr temp_saddr;
616 	struct rt6_info *rt;
617 	struct sk_buff *skb2;
618 	u32 info = 0;
619 
620 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
621 		return 1;
622 
623 	/* RFC 4884 (partial) support for ICMP extensions */
624 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
625 		data_len = 0;
626 
627 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
628 
629 	if (!skb2)
630 		return 1;
631 
632 	skb_dst_drop(skb2);
633 	skb_pull(skb2, nhs);
634 	skb_reset_network_header(skb2);
635 
636 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
637 			skb, 0);
638 
639 	if (rt && rt->dst.dev)
640 		skb2->dev = rt->dst.dev;
641 
642 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
643 
644 	if (data_len) {
645 		/* RFC 4884 (partial) support :
646 		 * insert 0 padding at the end, before the extensions
647 		 */
648 		__skb_push(skb2, nhs);
649 		skb_reset_network_header(skb2);
650 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
651 		memset(skb2->data + data_len - nhs, 0, nhs);
652 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
653 		 * and stored in reserved[0]
654 		 */
655 		info = (data_len/8) << 24;
656 	}
657 	if (type == ICMP_TIME_EXCEEDED)
658 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
659 			   info, &temp_saddr);
660 	else
661 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
662 			   info, &temp_saddr);
663 	if (rt)
664 		ip6_rt_put(rt);
665 
666 	kfree_skb(skb2);
667 
668 	return 0;
669 }
670 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
671 
672 static void icmpv6_echo_reply(struct sk_buff *skb)
673 {
674 	struct net *net = dev_net(skb->dev);
675 	struct sock *sk;
676 	struct inet6_dev *idev;
677 	struct ipv6_pinfo *np;
678 	const struct in6_addr *saddr = NULL;
679 	struct icmp6hdr *icmph = icmp6_hdr(skb);
680 	struct icmp6hdr tmp_hdr;
681 	struct flowi6 fl6;
682 	struct icmpv6_msg msg;
683 	struct dst_entry *dst;
684 	struct ipcm6_cookie ipc6;
685 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
686 	bool acast;
687 
688 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
689 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
690 		return;
691 
692 	saddr = &ipv6_hdr(skb)->daddr;
693 
694 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
695 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
696 		return;
697 
698 	if (!ipv6_unicast_destination(skb) &&
699 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
700 		saddr = NULL;
701 
702 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
703 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
704 
705 	memset(&fl6, 0, sizeof(fl6));
706 	fl6.flowi6_proto = IPPROTO_ICMPV6;
707 	fl6.daddr = ipv6_hdr(skb)->saddr;
708 	if (saddr)
709 		fl6.saddr = *saddr;
710 	fl6.flowi6_oif = icmp6_iif(skb);
711 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
712 	fl6.flowi6_mark = mark;
713 	fl6.flowi6_uid = sock_net_uid(net, NULL);
714 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
715 
716 	local_bh_disable();
717 	sk = icmpv6_xmit_lock(net);
718 	if (!sk)
719 		goto out_bh_enable;
720 	sk->sk_mark = mark;
721 	np = inet6_sk(sk);
722 
723 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
724 		fl6.flowi6_oif = np->mcast_oif;
725 	else if (!fl6.flowi6_oif)
726 		fl6.flowi6_oif = np->ucast_oif;
727 
728 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
729 		goto out;
730 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
731 	if (IS_ERR(dst))
732 		goto out;
733 
734 	/* Check the ratelimit */
735 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
736 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
737 		goto out_dst_release;
738 
739 	idev = __in6_dev_get(skb->dev);
740 
741 	msg.skb = skb;
742 	msg.offset = 0;
743 	msg.type = ICMPV6_ECHO_REPLY;
744 
745 	ipcm6_init_sk(&ipc6, np);
746 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
747 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
748 
749 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
750 			    skb->len + sizeof(struct icmp6hdr),
751 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
752 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
753 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
754 		ip6_flush_pending_frames(sk);
755 	} else {
756 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
757 					   skb->len + sizeof(struct icmp6hdr));
758 	}
759 out_dst_release:
760 	dst_release(dst);
761 out:
762 	icmpv6_xmit_unlock(sk);
763 out_bh_enable:
764 	local_bh_enable();
765 }
766 
767 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
768 {
769 	const struct inet6_protocol *ipprot;
770 	int inner_offset;
771 	__be16 frag_off;
772 	u8 nexthdr;
773 	struct net *net = dev_net(skb->dev);
774 
775 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
776 		goto out;
777 
778 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
779 	if (ipv6_ext_hdr(nexthdr)) {
780 		/* now skip over extension headers */
781 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
782 						&nexthdr, &frag_off);
783 		if (inner_offset < 0)
784 			goto out;
785 	} else {
786 		inner_offset = sizeof(struct ipv6hdr);
787 	}
788 
789 	/* Checkin header including 8 bytes of inner protocol header. */
790 	if (!pskb_may_pull(skb, inner_offset+8))
791 		goto out;
792 
793 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
794 	   Without this we will not able f.e. to make source routed
795 	   pmtu discovery.
796 	   Corresponding argument (opt) to notifiers is already added.
797 	   --ANK (980726)
798 	 */
799 
800 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
801 	if (ipprot && ipprot->err_handler)
802 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
803 
804 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
805 	return;
806 
807 out:
808 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
809 }
810 
811 /*
812  *	Handle icmp messages
813  */
814 
815 static int icmpv6_rcv(struct sk_buff *skb)
816 {
817 	struct net *net = dev_net(skb->dev);
818 	struct net_device *dev = icmp6_dev(skb);
819 	struct inet6_dev *idev = __in6_dev_get(dev);
820 	const struct in6_addr *saddr, *daddr;
821 	struct icmp6hdr *hdr;
822 	u8 type;
823 	bool success = false;
824 
825 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
826 		struct sec_path *sp = skb_sec_path(skb);
827 		int nh;
828 
829 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
830 				 XFRM_STATE_ICMP))
831 			goto drop_no_count;
832 
833 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
834 			goto drop_no_count;
835 
836 		nh = skb_network_offset(skb);
837 		skb_set_network_header(skb, sizeof(*hdr));
838 
839 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
840 			goto drop_no_count;
841 
842 		skb_set_network_header(skb, nh);
843 	}
844 
845 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
846 
847 	saddr = &ipv6_hdr(skb)->saddr;
848 	daddr = &ipv6_hdr(skb)->daddr;
849 
850 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
851 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
852 				    saddr, daddr);
853 		goto csum_error;
854 	}
855 
856 	if (!pskb_pull(skb, sizeof(*hdr)))
857 		goto discard_it;
858 
859 	hdr = icmp6_hdr(skb);
860 
861 	type = hdr->icmp6_type;
862 
863 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
864 
865 	switch (type) {
866 	case ICMPV6_ECHO_REQUEST:
867 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
868 			icmpv6_echo_reply(skb);
869 		break;
870 
871 	case ICMPV6_ECHO_REPLY:
872 		success = ping_rcv(skb);
873 		break;
874 
875 	case ICMPV6_PKT_TOOBIG:
876 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
877 		   standard destination cache. Seems, only "advanced"
878 		   destination cache will allow to solve this problem
879 		   --ANK (980726)
880 		 */
881 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
882 			goto discard_it;
883 		hdr = icmp6_hdr(skb);
884 
885 		/* to notify */
886 		/* fall through */
887 	case ICMPV6_DEST_UNREACH:
888 	case ICMPV6_TIME_EXCEED:
889 	case ICMPV6_PARAMPROB:
890 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
891 		break;
892 
893 	case NDISC_ROUTER_SOLICITATION:
894 	case NDISC_ROUTER_ADVERTISEMENT:
895 	case NDISC_NEIGHBOUR_SOLICITATION:
896 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
897 	case NDISC_REDIRECT:
898 		ndisc_rcv(skb);
899 		break;
900 
901 	case ICMPV6_MGM_QUERY:
902 		igmp6_event_query(skb);
903 		break;
904 
905 	case ICMPV6_MGM_REPORT:
906 		igmp6_event_report(skb);
907 		break;
908 
909 	case ICMPV6_MGM_REDUCTION:
910 	case ICMPV6_NI_QUERY:
911 	case ICMPV6_NI_REPLY:
912 	case ICMPV6_MLD2_REPORT:
913 	case ICMPV6_DHAAD_REQUEST:
914 	case ICMPV6_DHAAD_REPLY:
915 	case ICMPV6_MOBILE_PREFIX_SOL:
916 	case ICMPV6_MOBILE_PREFIX_ADV:
917 		break;
918 
919 	default:
920 		/* informational */
921 		if (type & ICMPV6_INFOMSG_MASK)
922 			break;
923 
924 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
925 				    saddr, daddr);
926 
927 		/*
928 		 * error of unknown type.
929 		 * must pass to upper level
930 		 */
931 
932 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
933 	}
934 
935 	/* until the v6 path can be better sorted assume failure and
936 	 * preserve the status quo behaviour for the rest of the paths to here
937 	 */
938 	if (success)
939 		consume_skb(skb);
940 	else
941 		kfree_skb(skb);
942 
943 	return 0;
944 
945 csum_error:
946 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
947 discard_it:
948 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
949 drop_no_count:
950 	kfree_skb(skb);
951 	return 0;
952 }
953 
954 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
955 		      u8 type,
956 		      const struct in6_addr *saddr,
957 		      const struct in6_addr *daddr,
958 		      int oif)
959 {
960 	memset(fl6, 0, sizeof(*fl6));
961 	fl6->saddr = *saddr;
962 	fl6->daddr = *daddr;
963 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
964 	fl6->fl6_icmp_type	= type;
965 	fl6->fl6_icmp_code	= 0;
966 	fl6->flowi6_oif		= oif;
967 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
968 }
969 
970 static void __net_exit icmpv6_sk_exit(struct net *net)
971 {
972 	int i;
973 
974 	for_each_possible_cpu(i)
975 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
976 	free_percpu(net->ipv6.icmp_sk);
977 }
978 
979 static int __net_init icmpv6_sk_init(struct net *net)
980 {
981 	struct sock *sk;
982 	int err, i;
983 
984 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
985 	if (!net->ipv6.icmp_sk)
986 		return -ENOMEM;
987 
988 	for_each_possible_cpu(i) {
989 		err = inet_ctl_sock_create(&sk, PF_INET6,
990 					   SOCK_RAW, IPPROTO_ICMPV6, net);
991 		if (err < 0) {
992 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
993 			       err);
994 			goto fail;
995 		}
996 
997 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
998 
999 		/* Enough space for 2 64K ICMP packets, including
1000 		 * sk_buff struct overhead.
1001 		 */
1002 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1003 	}
1004 	return 0;
1005 
1006  fail:
1007 	icmpv6_sk_exit(net);
1008 	return err;
1009 }
1010 
1011 static struct pernet_operations icmpv6_sk_ops = {
1012 	.init = icmpv6_sk_init,
1013 	.exit = icmpv6_sk_exit,
1014 };
1015 
1016 int __init icmpv6_init(void)
1017 {
1018 	int err;
1019 
1020 	err = register_pernet_subsys(&icmpv6_sk_ops);
1021 	if (err < 0)
1022 		return err;
1023 
1024 	err = -EAGAIN;
1025 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1026 		goto fail;
1027 
1028 	err = inet6_register_icmp_sender(icmp6_send);
1029 	if (err)
1030 		goto sender_reg_err;
1031 	return 0;
1032 
1033 sender_reg_err:
1034 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1035 fail:
1036 	pr_err("Failed to register ICMP6 protocol\n");
1037 	unregister_pernet_subsys(&icmpv6_sk_ops);
1038 	return err;
1039 }
1040 
1041 void icmpv6_cleanup(void)
1042 {
1043 	inet6_unregister_icmp_sender(icmp6_send);
1044 	unregister_pernet_subsys(&icmpv6_sk_ops);
1045 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1046 }
1047 
1048 
1049 static const struct icmp6_err {
1050 	int err;
1051 	int fatal;
1052 } tab_unreach[] = {
1053 	{	/* NOROUTE */
1054 		.err	= ENETUNREACH,
1055 		.fatal	= 0,
1056 	},
1057 	{	/* ADM_PROHIBITED */
1058 		.err	= EACCES,
1059 		.fatal	= 1,
1060 	},
1061 	{	/* Was NOT_NEIGHBOUR, now reserved */
1062 		.err	= EHOSTUNREACH,
1063 		.fatal	= 0,
1064 	},
1065 	{	/* ADDR_UNREACH	*/
1066 		.err	= EHOSTUNREACH,
1067 		.fatal	= 0,
1068 	},
1069 	{	/* PORT_UNREACH	*/
1070 		.err	= ECONNREFUSED,
1071 		.fatal	= 1,
1072 	},
1073 	{	/* POLICY_FAIL */
1074 		.err	= EACCES,
1075 		.fatal	= 1,
1076 	},
1077 	{	/* REJECT_ROUTE	*/
1078 		.err	= EACCES,
1079 		.fatal	= 1,
1080 	},
1081 };
1082 
1083 int icmpv6_err_convert(u8 type, u8 code, int *err)
1084 {
1085 	int fatal = 0;
1086 
1087 	*err = EPROTO;
1088 
1089 	switch (type) {
1090 	case ICMPV6_DEST_UNREACH:
1091 		fatal = 1;
1092 		if (code < ARRAY_SIZE(tab_unreach)) {
1093 			*err  = tab_unreach[code].err;
1094 			fatal = tab_unreach[code].fatal;
1095 		}
1096 		break;
1097 
1098 	case ICMPV6_PKT_TOOBIG:
1099 		*err = EMSGSIZE;
1100 		break;
1101 
1102 	case ICMPV6_PARAMPROB:
1103 		*err = EPROTO;
1104 		fatal = 1;
1105 		break;
1106 
1107 	case ICMPV6_TIME_EXCEED:
1108 		*err = EHOSTUNREACH;
1109 		break;
1110 	}
1111 
1112 	return fatal;
1113 }
1114 EXPORT_SYMBOL(icmpv6_err_convert);
1115 
1116 #ifdef CONFIG_SYSCTL
1117 static struct ctl_table ipv6_icmp_table_template[] = {
1118 	{
1119 		.procname	= "ratelimit",
1120 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1121 		.maxlen		= sizeof(int),
1122 		.mode		= 0644,
1123 		.proc_handler	= proc_dointvec_ms_jiffies,
1124 	},
1125 	{
1126 		.procname	= "echo_ignore_all",
1127 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1128 		.maxlen		= sizeof(int),
1129 		.mode		= 0644,
1130 		.proc_handler = proc_dointvec,
1131 	},
1132 	{
1133 		.procname	= "echo_ignore_multicast",
1134 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1135 		.maxlen		= sizeof(int),
1136 		.mode		= 0644,
1137 		.proc_handler = proc_dointvec,
1138 	},
1139 	{
1140 		.procname	= "echo_ignore_anycast",
1141 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1142 		.maxlen		= sizeof(int),
1143 		.mode		= 0644,
1144 		.proc_handler = proc_dointvec,
1145 	},
1146 	{
1147 		.procname	= "ratemask",
1148 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1149 		.maxlen		= ICMPV6_MSG_MAX + 1,
1150 		.mode		= 0644,
1151 		.proc_handler = proc_do_large_bitmap,
1152 	},
1153 	{ },
1154 };
1155 
1156 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1157 {
1158 	struct ctl_table *table;
1159 
1160 	table = kmemdup(ipv6_icmp_table_template,
1161 			sizeof(ipv6_icmp_table_template),
1162 			GFP_KERNEL);
1163 
1164 	if (table) {
1165 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1166 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1167 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1168 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1169 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1170 	}
1171 	return table;
1172 }
1173 #endif
1174