xref: /openbmc/linux/net/ipv6/icmp.c (revision 23aebdac)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 /* Called with BH disabled */
114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
115 {
116 	struct sock *sk;
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 static bool icmpv6_mask_allow(int type)
170 {
171 	/* Informational messages are not limited. */
172 	if (type & ICMPV6_INFOMSG_MASK)
173 		return true;
174 
175 	/* Do not limit pmtu discovery, it would break it. */
176 	if (type == ICMPV6_PKT_TOOBIG)
177 		return true;
178 
179 	return false;
180 }
181 
182 static bool icmpv6_global_allow(int type)
183 {
184 	if (icmpv6_mask_allow(type))
185 		return true;
186 
187 	if (icmp_global_allow())
188 		return true;
189 
190 	return false;
191 }
192 
193 /*
194  * Check the ICMP output rate limit
195  */
196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
197 			       struct flowi6 *fl6)
198 {
199 	struct net *net = sock_net(sk);
200 	struct dst_entry *dst;
201 	bool res = false;
202 
203 	if (icmpv6_mask_allow(type))
204 		return true;
205 
206 	/*
207 	 * Look up the output route.
208 	 * XXX: perhaps the expire for routing entries cloned by
209 	 * this lookup should be more aggressive (not longer than timeout).
210 	 */
211 	dst = ip6_route_output(net, sk, fl6);
212 	if (dst->error) {
213 		IP6_INC_STATS(net, ip6_dst_idev(dst),
214 			      IPSTATS_MIB_OUTNOROUTES);
215 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216 		res = true;
217 	} else {
218 		struct rt6_info *rt = (struct rt6_info *)dst;
219 		int tmo = net->ipv6.sysctl.icmpv6_time;
220 		struct inet_peer *peer;
221 
222 		/* Give more bandwidth to wider prefixes. */
223 		if (rt->rt6i_dst.plen < 128)
224 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225 
226 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
227 		res = inet_peer_xrlim_allow(peer, tmo);
228 		if (peer)
229 			inet_putpeer(peer);
230 	}
231 	dst_release(dst);
232 	return res;
233 }
234 
235 /*
236  *	an inline helper for the "simple" if statement below
237  *	checks if parameter problem report is caused by an
238  *	unrecognized IPv6 option that has the Option Type
239  *	highest-order two bits set to 10
240  */
241 
242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
243 {
244 	u8 _optval, *op;
245 
246 	offset += skb_network_offset(skb);
247 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248 	if (!op)
249 		return true;
250 	return (*op & 0xC0) == 0x80;
251 }
252 
253 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 			       struct icmp6hdr *thdr, int len)
255 {
256 	struct sk_buff *skb;
257 	struct icmp6hdr *icmp6h;
258 	int err = 0;
259 
260 	skb = skb_peek(&sk->sk_write_queue);
261 	if (!skb)
262 		goto out;
263 
264 	icmp6h = icmp6_hdr(skb);
265 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
266 	icmp6h->icmp6_cksum = 0;
267 
268 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
269 		skb->csum = csum_partial(icmp6h,
270 					sizeof(struct icmp6hdr), skb->csum);
271 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
272 						      &fl6->daddr,
273 						      len, fl6->flowi6_proto,
274 						      skb->csum);
275 	} else {
276 		__wsum tmp_csum = 0;
277 
278 		skb_queue_walk(&sk->sk_write_queue, skb) {
279 			tmp_csum = csum_add(tmp_csum, skb->csum);
280 		}
281 
282 		tmp_csum = csum_partial(icmp6h,
283 					sizeof(struct icmp6hdr), tmp_csum);
284 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
285 						      &fl6->daddr,
286 						      len, fl6->flowi6_proto,
287 						      tmp_csum);
288 	}
289 	ip6_push_pending_frames(sk);
290 out:
291 	return err;
292 }
293 
294 struct icmpv6_msg {
295 	struct sk_buff	*skb;
296 	int		offset;
297 	uint8_t		type;
298 };
299 
300 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
301 {
302 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
303 	struct sk_buff *org_skb = msg->skb;
304 	__wsum csum = 0;
305 
306 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
307 				      to, len, csum);
308 	skb->csum = csum_block_add(skb->csum, csum, odd);
309 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
310 		nf_ct_attach(skb, org_skb);
311 	return 0;
312 }
313 
314 #if IS_ENABLED(CONFIG_IPV6_MIP6)
315 static void mip6_addr_swap(struct sk_buff *skb)
316 {
317 	struct ipv6hdr *iph = ipv6_hdr(skb);
318 	struct inet6_skb_parm *opt = IP6CB(skb);
319 	struct ipv6_destopt_hao *hao;
320 	struct in6_addr tmp;
321 	int off;
322 
323 	if (opt->dsthao) {
324 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
325 		if (likely(off >= 0)) {
326 			hao = (struct ipv6_destopt_hao *)
327 					(skb_network_header(skb) + off);
328 			tmp = iph->saddr;
329 			iph->saddr = hao->addr;
330 			hao->addr = tmp;
331 		}
332 	}
333 }
334 #else
335 static inline void mip6_addr_swap(struct sk_buff *skb) {}
336 #endif
337 
338 static struct dst_entry *icmpv6_route_lookup(struct net *net,
339 					     struct sk_buff *skb,
340 					     struct sock *sk,
341 					     struct flowi6 *fl6)
342 {
343 	struct dst_entry *dst, *dst2;
344 	struct flowi6 fl2;
345 	int err;
346 
347 	err = ip6_dst_lookup(net, sk, &dst, fl6);
348 	if (err)
349 		return ERR_PTR(err);
350 
351 	/*
352 	 * We won't send icmp if the destination is known
353 	 * anycast.
354 	 */
355 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
356 		net_dbg_ratelimited("icmp6_send: acast source\n");
357 		dst_release(dst);
358 		return ERR_PTR(-EINVAL);
359 	}
360 
361 	/* No need to clone since we're just using its address. */
362 	dst2 = dst;
363 
364 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
365 	if (!IS_ERR(dst)) {
366 		if (dst != dst2)
367 			return dst;
368 	} else {
369 		if (PTR_ERR(dst) == -EPERM)
370 			dst = NULL;
371 		else
372 			return dst;
373 	}
374 
375 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
376 	if (err)
377 		goto relookup_failed;
378 
379 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
380 	if (err)
381 		goto relookup_failed;
382 
383 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
384 	if (!IS_ERR(dst2)) {
385 		dst_release(dst);
386 		dst = dst2;
387 	} else {
388 		err = PTR_ERR(dst2);
389 		if (err == -EPERM) {
390 			dst_release(dst);
391 			return dst2;
392 		} else
393 			goto relookup_failed;
394 	}
395 
396 relookup_failed:
397 	if (dst)
398 		return dst;
399 	return ERR_PTR(err);
400 }
401 
402 /*
403  *	Send an ICMP message in response to a packet in error
404  */
405 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
406 		       const struct in6_addr *force_saddr)
407 {
408 	struct net *net = dev_net(skb->dev);
409 	struct inet6_dev *idev = NULL;
410 	struct ipv6hdr *hdr = ipv6_hdr(skb);
411 	struct sock *sk;
412 	struct ipv6_pinfo *np;
413 	const struct in6_addr *saddr = NULL;
414 	struct dst_entry *dst;
415 	struct icmp6hdr tmp_hdr;
416 	struct flowi6 fl6;
417 	struct icmpv6_msg msg;
418 	struct sockcm_cookie sockc_unused = {0};
419 	struct ipcm6_cookie ipc6;
420 	int iif = 0;
421 	int addr_type = 0;
422 	int len;
423 	int err = 0;
424 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
425 
426 	if ((u8 *)hdr < skb->head ||
427 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
428 		return;
429 
430 	/*
431 	 *	Make sure we respect the rules
432 	 *	i.e. RFC 1885 2.4(e)
433 	 *	Rule (e.1) is enforced by not using icmp6_send
434 	 *	in any code that processes icmp errors.
435 	 */
436 	addr_type = ipv6_addr_type(&hdr->daddr);
437 
438 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
439 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
440 		saddr = &hdr->daddr;
441 
442 	/*
443 	 *	Dest addr check
444 	 */
445 
446 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
447 		if (type != ICMPV6_PKT_TOOBIG &&
448 		    !(type == ICMPV6_PARAMPROB &&
449 		      code == ICMPV6_UNK_OPTION &&
450 		      (opt_unrec(skb, info))))
451 			return;
452 
453 		saddr = NULL;
454 	}
455 
456 	addr_type = ipv6_addr_type(&hdr->saddr);
457 
458 	/*
459 	 *	Source addr check
460 	 */
461 
462 	if (__ipv6_addr_needs_scope_id(addr_type)) {
463 		iif = skb->dev->ifindex;
464 
465 		/* for local packets, get the real device index */
466 		if (iif == LOOPBACK_IFINDEX) {
467 			dst = skb_dst(skb);
468 			if (dst) {
469 				struct rt6_info *rt;
470 
471 				rt = container_of(dst, struct rt6_info, dst);
472 				iif = rt->rt6i_idev->dev->ifindex;
473 			}
474 		}
475 	} else {
476 		dst = skb_dst(skb);
477 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
478 	}
479 
480 	/*
481 	 *	Must not send error if the source does not uniquely
482 	 *	identify a single node (RFC2463 Section 2.4).
483 	 *	We check unspecified / multicast addresses here,
484 	 *	and anycast addresses will be checked later.
485 	 */
486 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
487 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
488 				    &hdr->saddr, &hdr->daddr);
489 		return;
490 	}
491 
492 	/*
493 	 *	Never answer to a ICMP packet.
494 	 */
495 	if (is_ineligible(skb)) {
496 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
497 				    &hdr->saddr, &hdr->daddr);
498 		return;
499 	}
500 
501 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
502 	local_bh_disable();
503 
504 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
505 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
506 		goto out_bh_enable;
507 
508 	mip6_addr_swap(skb);
509 
510 	memset(&fl6, 0, sizeof(fl6));
511 	fl6.flowi6_proto = IPPROTO_ICMPV6;
512 	fl6.daddr = hdr->saddr;
513 	if (force_saddr)
514 		saddr = force_saddr;
515 	if (saddr)
516 		fl6.saddr = *saddr;
517 	fl6.flowi6_mark = mark;
518 	fl6.flowi6_oif = iif;
519 	fl6.fl6_icmp_type = type;
520 	fl6.fl6_icmp_code = code;
521 	fl6.flowi6_uid = sock_net_uid(net, NULL);
522 	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
523 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
524 
525 	sk = icmpv6_xmit_lock(net);
526 	if (!sk)
527 		goto out_bh_enable;
528 
529 	sk->sk_mark = mark;
530 	np = inet6_sk(sk);
531 
532 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
533 		goto out;
534 
535 	tmp_hdr.icmp6_type = type;
536 	tmp_hdr.icmp6_code = code;
537 	tmp_hdr.icmp6_cksum = 0;
538 	tmp_hdr.icmp6_pointer = htonl(info);
539 
540 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
541 		fl6.flowi6_oif = np->mcast_oif;
542 	else if (!fl6.flowi6_oif)
543 		fl6.flowi6_oif = np->ucast_oif;
544 
545 	ipc6.tclass = np->tclass;
546 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
547 
548 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
549 	if (IS_ERR(dst))
550 		goto out;
551 
552 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
553 	ipc6.dontfrag = np->dontfrag;
554 	ipc6.opt = NULL;
555 
556 	msg.skb = skb;
557 	msg.offset = skb_network_offset(skb);
558 	msg.type = type;
559 
560 	len = skb->len - msg.offset;
561 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
562 	if (len < 0) {
563 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
564 				    &hdr->saddr, &hdr->daddr);
565 		goto out_dst_release;
566 	}
567 
568 	rcu_read_lock();
569 	idev = __in6_dev_get(skb->dev);
570 
571 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
572 			      len + sizeof(struct icmp6hdr),
573 			      sizeof(struct icmp6hdr),
574 			      &ipc6, &fl6, (struct rt6_info *)dst,
575 			      MSG_DONTWAIT, &sockc_unused);
576 	if (err) {
577 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
578 		ip6_flush_pending_frames(sk);
579 	} else {
580 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
581 						 len + sizeof(struct icmp6hdr));
582 	}
583 	rcu_read_unlock();
584 out_dst_release:
585 	dst_release(dst);
586 out:
587 	icmpv6_xmit_unlock(sk);
588 out_bh_enable:
589 	local_bh_enable();
590 }
591 
592 /* Slightly more convenient version of icmp6_send.
593  */
594 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
595 {
596 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
597 	kfree_skb(skb);
598 }
599 
600 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
601  * if sufficient data bytes are available
602  * @nhs is the size of the tunnel header(s) :
603  *  Either an IPv4 header for SIT encap
604  *         an IPv4 header + GRE header for GRE encap
605  */
606 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
607 			       unsigned int data_len)
608 {
609 	struct in6_addr temp_saddr;
610 	struct rt6_info *rt;
611 	struct sk_buff *skb2;
612 	u32 info = 0;
613 
614 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
615 		return 1;
616 
617 	/* RFC 4884 (partial) support for ICMP extensions */
618 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
619 		data_len = 0;
620 
621 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
622 
623 	if (!skb2)
624 		return 1;
625 
626 	skb_dst_drop(skb2);
627 	skb_pull(skb2, nhs);
628 	skb_reset_network_header(skb2);
629 
630 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
631 
632 	if (rt && rt->dst.dev)
633 		skb2->dev = rt->dst.dev;
634 
635 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
636 
637 	if (data_len) {
638 		/* RFC 4884 (partial) support :
639 		 * insert 0 padding at the end, before the extensions
640 		 */
641 		__skb_push(skb2, nhs);
642 		skb_reset_network_header(skb2);
643 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
644 		memset(skb2->data + data_len - nhs, 0, nhs);
645 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
646 		 * and stored in reserved[0]
647 		 */
648 		info = (data_len/8) << 24;
649 	}
650 	if (type == ICMP_TIME_EXCEEDED)
651 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
652 			   info, &temp_saddr);
653 	else
654 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
655 			   info, &temp_saddr);
656 	if (rt)
657 		ip6_rt_put(rt);
658 
659 	kfree_skb(skb2);
660 
661 	return 0;
662 }
663 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
664 
665 static void icmpv6_echo_reply(struct sk_buff *skb)
666 {
667 	struct net *net = dev_net(skb->dev);
668 	struct sock *sk;
669 	struct inet6_dev *idev;
670 	struct ipv6_pinfo *np;
671 	const struct in6_addr *saddr = NULL;
672 	struct icmp6hdr *icmph = icmp6_hdr(skb);
673 	struct icmp6hdr tmp_hdr;
674 	struct flowi6 fl6;
675 	struct icmpv6_msg msg;
676 	struct dst_entry *dst;
677 	struct ipcm6_cookie ipc6;
678 	int err = 0;
679 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
680 	struct sockcm_cookie sockc_unused = {0};
681 
682 	saddr = &ipv6_hdr(skb)->daddr;
683 
684 	if (!ipv6_unicast_destination(skb) &&
685 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
686 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
687 		saddr = NULL;
688 
689 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
690 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
691 
692 	memset(&fl6, 0, sizeof(fl6));
693 	fl6.flowi6_proto = IPPROTO_ICMPV6;
694 	fl6.daddr = ipv6_hdr(skb)->saddr;
695 	if (saddr)
696 		fl6.saddr = *saddr;
697 	fl6.flowi6_oif = skb->dev->ifindex;
698 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
699 	fl6.flowi6_mark = mark;
700 	fl6.flowi6_uid = sock_net_uid(net, NULL);
701 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
702 
703 	local_bh_disable();
704 	sk = icmpv6_xmit_lock(net);
705 	if (!sk)
706 		goto out_bh_enable;
707 	sk->sk_mark = mark;
708 	np = inet6_sk(sk);
709 
710 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
711 		fl6.flowi6_oif = np->mcast_oif;
712 	else if (!fl6.flowi6_oif)
713 		fl6.flowi6_oif = np->ucast_oif;
714 
715 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
716 	if (err)
717 		goto out;
718 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
719 	if (IS_ERR(dst))
720 		goto out;
721 
722 	idev = __in6_dev_get(skb->dev);
723 
724 	msg.skb = skb;
725 	msg.offset = 0;
726 	msg.type = ICMPV6_ECHO_REPLY;
727 
728 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
729 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
730 	ipc6.dontfrag = np->dontfrag;
731 	ipc6.opt = NULL;
732 
733 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
734 				sizeof(struct icmp6hdr), &ipc6, &fl6,
735 				(struct rt6_info *)dst, MSG_DONTWAIT,
736 				&sockc_unused);
737 
738 	if (err) {
739 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
740 		ip6_flush_pending_frames(sk);
741 	} else {
742 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
743 						 skb->len + sizeof(struct icmp6hdr));
744 	}
745 	dst_release(dst);
746 out:
747 	icmpv6_xmit_unlock(sk);
748 out_bh_enable:
749 	local_bh_enable();
750 }
751 
752 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
753 {
754 	const struct inet6_protocol *ipprot;
755 	int inner_offset;
756 	__be16 frag_off;
757 	u8 nexthdr;
758 	struct net *net = dev_net(skb->dev);
759 
760 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
761 		goto out;
762 
763 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
764 	if (ipv6_ext_hdr(nexthdr)) {
765 		/* now skip over extension headers */
766 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
767 						&nexthdr, &frag_off);
768 		if (inner_offset < 0)
769 			goto out;
770 	} else {
771 		inner_offset = sizeof(struct ipv6hdr);
772 	}
773 
774 	/* Checkin header including 8 bytes of inner protocol header. */
775 	if (!pskb_may_pull(skb, inner_offset+8))
776 		goto out;
777 
778 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
779 	   Without this we will not able f.e. to make source routed
780 	   pmtu discovery.
781 	   Corresponding argument (opt) to notifiers is already added.
782 	   --ANK (980726)
783 	 */
784 
785 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
786 	if (ipprot && ipprot->err_handler)
787 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
788 
789 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
790 	return;
791 
792 out:
793 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
794 }
795 
796 /*
797  *	Handle icmp messages
798  */
799 
800 static int icmpv6_rcv(struct sk_buff *skb)
801 {
802 	struct net_device *dev = skb->dev;
803 	struct inet6_dev *idev = __in6_dev_get(dev);
804 	const struct in6_addr *saddr, *daddr;
805 	struct icmp6hdr *hdr;
806 	u8 type;
807 	bool success = false;
808 
809 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
810 		struct sec_path *sp = skb_sec_path(skb);
811 		int nh;
812 
813 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
814 				 XFRM_STATE_ICMP))
815 			goto drop_no_count;
816 
817 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
818 			goto drop_no_count;
819 
820 		nh = skb_network_offset(skb);
821 		skb_set_network_header(skb, sizeof(*hdr));
822 
823 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
824 			goto drop_no_count;
825 
826 		skb_set_network_header(skb, nh);
827 	}
828 
829 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
830 
831 	saddr = &ipv6_hdr(skb)->saddr;
832 	daddr = &ipv6_hdr(skb)->daddr;
833 
834 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
835 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
836 				    saddr, daddr);
837 		goto csum_error;
838 	}
839 
840 	if (!pskb_pull(skb, sizeof(*hdr)))
841 		goto discard_it;
842 
843 	hdr = icmp6_hdr(skb);
844 
845 	type = hdr->icmp6_type;
846 
847 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
848 
849 	switch (type) {
850 	case ICMPV6_ECHO_REQUEST:
851 		icmpv6_echo_reply(skb);
852 		break;
853 
854 	case ICMPV6_ECHO_REPLY:
855 		success = ping_rcv(skb);
856 		break;
857 
858 	case ICMPV6_PKT_TOOBIG:
859 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
860 		   standard destination cache. Seems, only "advanced"
861 		   destination cache will allow to solve this problem
862 		   --ANK (980726)
863 		 */
864 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
865 			goto discard_it;
866 		hdr = icmp6_hdr(skb);
867 
868 		/*
869 		 *	Drop through to notify
870 		 */
871 
872 	case ICMPV6_DEST_UNREACH:
873 	case ICMPV6_TIME_EXCEED:
874 	case ICMPV6_PARAMPROB:
875 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
876 		break;
877 
878 	case NDISC_ROUTER_SOLICITATION:
879 	case NDISC_ROUTER_ADVERTISEMENT:
880 	case NDISC_NEIGHBOUR_SOLICITATION:
881 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
882 	case NDISC_REDIRECT:
883 		ndisc_rcv(skb);
884 		break;
885 
886 	case ICMPV6_MGM_QUERY:
887 		igmp6_event_query(skb);
888 		break;
889 
890 	case ICMPV6_MGM_REPORT:
891 		igmp6_event_report(skb);
892 		break;
893 
894 	case ICMPV6_MGM_REDUCTION:
895 	case ICMPV6_NI_QUERY:
896 	case ICMPV6_NI_REPLY:
897 	case ICMPV6_MLD2_REPORT:
898 	case ICMPV6_DHAAD_REQUEST:
899 	case ICMPV6_DHAAD_REPLY:
900 	case ICMPV6_MOBILE_PREFIX_SOL:
901 	case ICMPV6_MOBILE_PREFIX_ADV:
902 		break;
903 
904 	default:
905 		/* informational */
906 		if (type & ICMPV6_INFOMSG_MASK)
907 			break;
908 
909 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
910 				    saddr, daddr);
911 
912 		/*
913 		 * error of unknown type.
914 		 * must pass to upper level
915 		 */
916 
917 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
918 	}
919 
920 	/* until the v6 path can be better sorted assume failure and
921 	 * preserve the status quo behaviour for the rest of the paths to here
922 	 */
923 	if (success)
924 		consume_skb(skb);
925 	else
926 		kfree_skb(skb);
927 
928 	return 0;
929 
930 csum_error:
931 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
932 discard_it:
933 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
934 drop_no_count:
935 	kfree_skb(skb);
936 	return 0;
937 }
938 
939 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
940 		      u8 type,
941 		      const struct in6_addr *saddr,
942 		      const struct in6_addr *daddr,
943 		      int oif)
944 {
945 	memset(fl6, 0, sizeof(*fl6));
946 	fl6->saddr = *saddr;
947 	fl6->daddr = *daddr;
948 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
949 	fl6->fl6_icmp_type	= type;
950 	fl6->fl6_icmp_code	= 0;
951 	fl6->flowi6_oif		= oif;
952 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
953 }
954 
955 static int __net_init icmpv6_sk_init(struct net *net)
956 {
957 	struct sock *sk;
958 	int err, i, j;
959 
960 	net->ipv6.icmp_sk =
961 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
962 	if (!net->ipv6.icmp_sk)
963 		return -ENOMEM;
964 
965 	for_each_possible_cpu(i) {
966 		err = inet_ctl_sock_create(&sk, PF_INET6,
967 					   SOCK_RAW, IPPROTO_ICMPV6, net);
968 		if (err < 0) {
969 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
970 			       err);
971 			goto fail;
972 		}
973 
974 		net->ipv6.icmp_sk[i] = sk;
975 
976 		/* Enough space for 2 64K ICMP packets, including
977 		 * sk_buff struct overhead.
978 		 */
979 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
980 	}
981 	return 0;
982 
983  fail:
984 	for (j = 0; j < i; j++)
985 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
986 	kfree(net->ipv6.icmp_sk);
987 	return err;
988 }
989 
990 static void __net_exit icmpv6_sk_exit(struct net *net)
991 {
992 	int i;
993 
994 	for_each_possible_cpu(i) {
995 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
996 	}
997 	kfree(net->ipv6.icmp_sk);
998 }
999 
1000 static struct pernet_operations icmpv6_sk_ops = {
1001 	.init = icmpv6_sk_init,
1002 	.exit = icmpv6_sk_exit,
1003 };
1004 
1005 int __init icmpv6_init(void)
1006 {
1007 	int err;
1008 
1009 	err = register_pernet_subsys(&icmpv6_sk_ops);
1010 	if (err < 0)
1011 		return err;
1012 
1013 	err = -EAGAIN;
1014 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1015 		goto fail;
1016 
1017 	err = inet6_register_icmp_sender(icmp6_send);
1018 	if (err)
1019 		goto sender_reg_err;
1020 	return 0;
1021 
1022 sender_reg_err:
1023 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1024 fail:
1025 	pr_err("Failed to register ICMP6 protocol\n");
1026 	unregister_pernet_subsys(&icmpv6_sk_ops);
1027 	return err;
1028 }
1029 
1030 void icmpv6_cleanup(void)
1031 {
1032 	inet6_unregister_icmp_sender(icmp6_send);
1033 	unregister_pernet_subsys(&icmpv6_sk_ops);
1034 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1035 }
1036 
1037 
1038 static const struct icmp6_err {
1039 	int err;
1040 	int fatal;
1041 } tab_unreach[] = {
1042 	{	/* NOROUTE */
1043 		.err	= ENETUNREACH,
1044 		.fatal	= 0,
1045 	},
1046 	{	/* ADM_PROHIBITED */
1047 		.err	= EACCES,
1048 		.fatal	= 1,
1049 	},
1050 	{	/* Was NOT_NEIGHBOUR, now reserved */
1051 		.err	= EHOSTUNREACH,
1052 		.fatal	= 0,
1053 	},
1054 	{	/* ADDR_UNREACH	*/
1055 		.err	= EHOSTUNREACH,
1056 		.fatal	= 0,
1057 	},
1058 	{	/* PORT_UNREACH	*/
1059 		.err	= ECONNREFUSED,
1060 		.fatal	= 1,
1061 	},
1062 	{	/* POLICY_FAIL */
1063 		.err	= EACCES,
1064 		.fatal	= 1,
1065 	},
1066 	{	/* REJECT_ROUTE	*/
1067 		.err	= EACCES,
1068 		.fatal	= 1,
1069 	},
1070 };
1071 
1072 int icmpv6_err_convert(u8 type, u8 code, int *err)
1073 {
1074 	int fatal = 0;
1075 
1076 	*err = EPROTO;
1077 
1078 	switch (type) {
1079 	case ICMPV6_DEST_UNREACH:
1080 		fatal = 1;
1081 		if (code < ARRAY_SIZE(tab_unreach)) {
1082 			*err  = tab_unreach[code].err;
1083 			fatal = tab_unreach[code].fatal;
1084 		}
1085 		break;
1086 
1087 	case ICMPV6_PKT_TOOBIG:
1088 		*err = EMSGSIZE;
1089 		break;
1090 
1091 	case ICMPV6_PARAMPROB:
1092 		*err = EPROTO;
1093 		fatal = 1;
1094 		break;
1095 
1096 	case ICMPV6_TIME_EXCEED:
1097 		*err = EHOSTUNREACH;
1098 		break;
1099 	}
1100 
1101 	return fatal;
1102 }
1103 EXPORT_SYMBOL(icmpv6_err_convert);
1104 
1105 #ifdef CONFIG_SYSCTL
1106 static struct ctl_table ipv6_icmp_table_template[] = {
1107 	{
1108 		.procname	= "ratelimit",
1109 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1110 		.maxlen		= sizeof(int),
1111 		.mode		= 0644,
1112 		.proc_handler	= proc_dointvec_ms_jiffies,
1113 	},
1114 	{ },
1115 };
1116 
1117 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1118 {
1119 	struct ctl_table *table;
1120 
1121 	table = kmemdup(ipv6_icmp_table_template,
1122 			sizeof(ipv6_icmp_table_template),
1123 			GFP_KERNEL);
1124 
1125 	if (table)
1126 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1127 
1128 	return table;
1129 }
1130 #endif
1131