xref: /openbmc/linux/net/ipv6/icmp.c (revision 5fdaa88d)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 /* Called with BH disabled */
114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
115 {
116 	struct sock *sk;
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 static bool icmpv6_mask_allow(int type)
170 {
171 	/* Informational messages are not limited. */
172 	if (type & ICMPV6_INFOMSG_MASK)
173 		return true;
174 
175 	/* Do not limit pmtu discovery, it would break it. */
176 	if (type == ICMPV6_PKT_TOOBIG)
177 		return true;
178 
179 	return false;
180 }
181 
182 static bool icmpv6_global_allow(int type)
183 {
184 	if (icmpv6_mask_allow(type))
185 		return true;
186 
187 	if (icmp_global_allow())
188 		return true;
189 
190 	return false;
191 }
192 
193 /*
194  * Check the ICMP output rate limit
195  */
196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
197 			       struct flowi6 *fl6)
198 {
199 	struct net *net = sock_net(sk);
200 	struct dst_entry *dst;
201 	bool res = false;
202 
203 	if (icmpv6_mask_allow(type))
204 		return true;
205 
206 	/*
207 	 * Look up the output route.
208 	 * XXX: perhaps the expire for routing entries cloned by
209 	 * this lookup should be more aggressive (not longer than timeout).
210 	 */
211 	dst = ip6_route_output(net, sk, fl6);
212 	if (dst->error) {
213 		IP6_INC_STATS(net, ip6_dst_idev(dst),
214 			      IPSTATS_MIB_OUTNOROUTES);
215 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216 		res = true;
217 	} else {
218 		struct rt6_info *rt = (struct rt6_info *)dst;
219 		int tmo = net->ipv6.sysctl.icmpv6_time;
220 		struct inet_peer *peer;
221 
222 		/* Give more bandwidth to wider prefixes. */
223 		if (rt->rt6i_dst.plen < 128)
224 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225 
226 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
227 		res = inet_peer_xrlim_allow(peer, tmo);
228 		if (peer)
229 			inet_putpeer(peer);
230 	}
231 	dst_release(dst);
232 	return res;
233 }
234 
235 /*
236  *	an inline helper for the "simple" if statement below
237  *	checks if parameter problem report is caused by an
238  *	unrecognized IPv6 option that has the Option Type
239  *	highest-order two bits set to 10
240  */
241 
242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
243 {
244 	u8 _optval, *op;
245 
246 	offset += skb_network_offset(skb);
247 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248 	if (!op)
249 		return true;
250 	return (*op & 0xC0) == 0x80;
251 }
252 
253 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 				struct icmp6hdr *thdr, int len)
255 {
256 	struct sk_buff *skb;
257 	struct icmp6hdr *icmp6h;
258 
259 	skb = skb_peek(&sk->sk_write_queue);
260 	if (!skb)
261 		return;
262 
263 	icmp6h = icmp6_hdr(skb);
264 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
265 	icmp6h->icmp6_cksum = 0;
266 
267 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
268 		skb->csum = csum_partial(icmp6h,
269 					sizeof(struct icmp6hdr), skb->csum);
270 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
271 						      &fl6->daddr,
272 						      len, fl6->flowi6_proto,
273 						      skb->csum);
274 	} else {
275 		__wsum tmp_csum = 0;
276 
277 		skb_queue_walk(&sk->sk_write_queue, skb) {
278 			tmp_csum = csum_add(tmp_csum, skb->csum);
279 		}
280 
281 		tmp_csum = csum_partial(icmp6h,
282 					sizeof(struct icmp6hdr), tmp_csum);
283 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
284 						      &fl6->daddr,
285 						      len, fl6->flowi6_proto,
286 						      tmp_csum);
287 	}
288 	ip6_push_pending_frames(sk);
289 }
290 
291 struct icmpv6_msg {
292 	struct sk_buff	*skb;
293 	int		offset;
294 	uint8_t		type;
295 };
296 
297 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
298 {
299 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
300 	struct sk_buff *org_skb = msg->skb;
301 	__wsum csum = 0;
302 
303 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
304 				      to, len, csum);
305 	skb->csum = csum_block_add(skb->csum, csum, odd);
306 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
307 		nf_ct_attach(skb, org_skb);
308 	return 0;
309 }
310 
311 #if IS_ENABLED(CONFIG_IPV6_MIP6)
312 static void mip6_addr_swap(struct sk_buff *skb)
313 {
314 	struct ipv6hdr *iph = ipv6_hdr(skb);
315 	struct inet6_skb_parm *opt = IP6CB(skb);
316 	struct ipv6_destopt_hao *hao;
317 	struct in6_addr tmp;
318 	int off;
319 
320 	if (opt->dsthao) {
321 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
322 		if (likely(off >= 0)) {
323 			hao = (struct ipv6_destopt_hao *)
324 					(skb_network_header(skb) + off);
325 			tmp = iph->saddr;
326 			iph->saddr = hao->addr;
327 			hao->addr = tmp;
328 		}
329 	}
330 }
331 #else
332 static inline void mip6_addr_swap(struct sk_buff *skb) {}
333 #endif
334 
335 static struct dst_entry *icmpv6_route_lookup(struct net *net,
336 					     struct sk_buff *skb,
337 					     struct sock *sk,
338 					     struct flowi6 *fl6)
339 {
340 	struct dst_entry *dst, *dst2;
341 	struct flowi6 fl2;
342 	int err;
343 
344 	err = ip6_dst_lookup(net, sk, &dst, fl6);
345 	if (err)
346 		return ERR_PTR(err);
347 
348 	/*
349 	 * We won't send icmp if the destination is known
350 	 * anycast.
351 	 */
352 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
353 		net_dbg_ratelimited("icmp6_send: acast source\n");
354 		dst_release(dst);
355 		return ERR_PTR(-EINVAL);
356 	}
357 
358 	/* No need to clone since we're just using its address. */
359 	dst2 = dst;
360 
361 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
362 	if (!IS_ERR(dst)) {
363 		if (dst != dst2)
364 			return dst;
365 	} else {
366 		if (PTR_ERR(dst) == -EPERM)
367 			dst = NULL;
368 		else
369 			return dst;
370 	}
371 
372 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
373 	if (err)
374 		goto relookup_failed;
375 
376 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
377 	if (err)
378 		goto relookup_failed;
379 
380 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
381 	if (!IS_ERR(dst2)) {
382 		dst_release(dst);
383 		dst = dst2;
384 	} else {
385 		err = PTR_ERR(dst2);
386 		if (err == -EPERM) {
387 			dst_release(dst);
388 			return dst2;
389 		} else
390 			goto relookup_failed;
391 	}
392 
393 relookup_failed:
394 	if (dst)
395 		return dst;
396 	return ERR_PTR(err);
397 }
398 
399 static int icmp6_iif(const struct sk_buff *skb)
400 {
401 	int iif = skb->dev->ifindex;
402 
403 	/* for local traffic to local address, skb dev is the loopback
404 	 * device. Check if there is a dst attached to the skb and if so
405 	 * get the real device index.
406 	 */
407 	if (unlikely(iif == LOOPBACK_IFINDEX)) {
408 		const struct rt6_info *rt6 = skb_rt6_info(skb);
409 
410 		if (rt6)
411 			iif = rt6->rt6i_idev->dev->ifindex;
412 	}
413 
414 	return iif;
415 }
416 
417 /*
418  *	Send an ICMP message in response to a packet in error
419  */
420 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
421 		       const struct in6_addr *force_saddr)
422 {
423 	struct net *net = dev_net(skb->dev);
424 	struct inet6_dev *idev = NULL;
425 	struct ipv6hdr *hdr = ipv6_hdr(skb);
426 	struct sock *sk;
427 	struct ipv6_pinfo *np;
428 	const struct in6_addr *saddr = NULL;
429 	struct dst_entry *dst;
430 	struct icmp6hdr tmp_hdr;
431 	struct flowi6 fl6;
432 	struct icmpv6_msg msg;
433 	struct ipcm6_cookie ipc6;
434 	int iif = 0;
435 	int addr_type = 0;
436 	int len;
437 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
438 
439 	if ((u8 *)hdr < skb->head ||
440 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
441 		return;
442 
443 	/*
444 	 *	Make sure we respect the rules
445 	 *	i.e. RFC 1885 2.4(e)
446 	 *	Rule (e.1) is enforced by not using icmp6_send
447 	 *	in any code that processes icmp errors.
448 	 */
449 	addr_type = ipv6_addr_type(&hdr->daddr);
450 
451 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
452 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
453 		saddr = &hdr->daddr;
454 
455 	/*
456 	 *	Dest addr check
457 	 */
458 
459 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
460 		if (type != ICMPV6_PKT_TOOBIG &&
461 		    !(type == ICMPV6_PARAMPROB &&
462 		      code == ICMPV6_UNK_OPTION &&
463 		      (opt_unrec(skb, info))))
464 			return;
465 
466 		saddr = NULL;
467 	}
468 
469 	addr_type = ipv6_addr_type(&hdr->saddr);
470 
471 	/*
472 	 *	Source addr check
473 	 */
474 
475 	if (__ipv6_addr_needs_scope_id(addr_type)) {
476 		iif = icmp6_iif(skb);
477 	} else {
478 		dst = skb_dst(skb);
479 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
480 	}
481 
482 	/*
483 	 *	Must not send error if the source does not uniquely
484 	 *	identify a single node (RFC2463 Section 2.4).
485 	 *	We check unspecified / multicast addresses here,
486 	 *	and anycast addresses will be checked later.
487 	 */
488 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
489 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
490 				    &hdr->saddr, &hdr->daddr);
491 		return;
492 	}
493 
494 	/*
495 	 *	Never answer to a ICMP packet.
496 	 */
497 	if (is_ineligible(skb)) {
498 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
499 				    &hdr->saddr, &hdr->daddr);
500 		return;
501 	}
502 
503 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
504 	local_bh_disable();
505 
506 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
507 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
508 		goto out_bh_enable;
509 
510 	mip6_addr_swap(skb);
511 
512 	memset(&fl6, 0, sizeof(fl6));
513 	fl6.flowi6_proto = IPPROTO_ICMPV6;
514 	fl6.daddr = hdr->saddr;
515 	if (force_saddr)
516 		saddr = force_saddr;
517 	if (saddr)
518 		fl6.saddr = *saddr;
519 	fl6.flowi6_mark = mark;
520 	fl6.flowi6_oif = iif;
521 	fl6.fl6_icmp_type = type;
522 	fl6.fl6_icmp_code = code;
523 	fl6.flowi6_uid = sock_net_uid(net, NULL);
524 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
525 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
526 
527 	sk = icmpv6_xmit_lock(net);
528 	if (!sk)
529 		goto out_bh_enable;
530 
531 	sk->sk_mark = mark;
532 	np = inet6_sk(sk);
533 
534 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
535 		goto out;
536 
537 	tmp_hdr.icmp6_type = type;
538 	tmp_hdr.icmp6_code = code;
539 	tmp_hdr.icmp6_cksum = 0;
540 	tmp_hdr.icmp6_pointer = htonl(info);
541 
542 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
543 		fl6.flowi6_oif = np->mcast_oif;
544 	else if (!fl6.flowi6_oif)
545 		fl6.flowi6_oif = np->ucast_oif;
546 
547 	ipcm6_init_sk(&ipc6, np);
548 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
549 
550 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
551 	if (IS_ERR(dst))
552 		goto out;
553 
554 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
555 
556 	msg.skb = skb;
557 	msg.offset = skb_network_offset(skb);
558 	msg.type = type;
559 
560 	len = skb->len - msg.offset;
561 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
562 	if (len < 0) {
563 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
564 				    &hdr->saddr, &hdr->daddr);
565 		goto out_dst_release;
566 	}
567 
568 	rcu_read_lock();
569 	idev = __in6_dev_get(skb->dev);
570 
571 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
572 			    len + sizeof(struct icmp6hdr),
573 			    sizeof(struct icmp6hdr),
574 			    &ipc6, &fl6, (struct rt6_info *)dst,
575 			    MSG_DONTWAIT)) {
576 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
577 		ip6_flush_pending_frames(sk);
578 	} else {
579 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
580 					   len + sizeof(struct icmp6hdr));
581 	}
582 	rcu_read_unlock();
583 out_dst_release:
584 	dst_release(dst);
585 out:
586 	icmpv6_xmit_unlock(sk);
587 out_bh_enable:
588 	local_bh_enable();
589 }
590 
591 /* Slightly more convenient version of icmp6_send.
592  */
593 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
594 {
595 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
596 	kfree_skb(skb);
597 }
598 
599 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
600  * if sufficient data bytes are available
601  * @nhs is the size of the tunnel header(s) :
602  *  Either an IPv4 header for SIT encap
603  *         an IPv4 header + GRE header for GRE encap
604  */
605 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
606 			       unsigned int data_len)
607 {
608 	struct in6_addr temp_saddr;
609 	struct rt6_info *rt;
610 	struct sk_buff *skb2;
611 	u32 info = 0;
612 
613 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
614 		return 1;
615 
616 	/* RFC 4884 (partial) support for ICMP extensions */
617 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
618 		data_len = 0;
619 
620 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
621 
622 	if (!skb2)
623 		return 1;
624 
625 	skb_dst_drop(skb2);
626 	skb_pull(skb2, nhs);
627 	skb_reset_network_header(skb2);
628 
629 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
630 			skb, 0);
631 
632 	if (rt && rt->dst.dev)
633 		skb2->dev = rt->dst.dev;
634 
635 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
636 
637 	if (data_len) {
638 		/* RFC 4884 (partial) support :
639 		 * insert 0 padding at the end, before the extensions
640 		 */
641 		__skb_push(skb2, nhs);
642 		skb_reset_network_header(skb2);
643 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
644 		memset(skb2->data + data_len - nhs, 0, nhs);
645 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
646 		 * and stored in reserved[0]
647 		 */
648 		info = (data_len/8) << 24;
649 	}
650 	if (type == ICMP_TIME_EXCEEDED)
651 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
652 			   info, &temp_saddr);
653 	else
654 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
655 			   info, &temp_saddr);
656 	if (rt)
657 		ip6_rt_put(rt);
658 
659 	kfree_skb(skb2);
660 
661 	return 0;
662 }
663 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
664 
665 static void icmpv6_echo_reply(struct sk_buff *skb)
666 {
667 	struct net *net = dev_net(skb->dev);
668 	struct sock *sk;
669 	struct inet6_dev *idev;
670 	struct ipv6_pinfo *np;
671 	const struct in6_addr *saddr = NULL;
672 	struct icmp6hdr *icmph = icmp6_hdr(skb);
673 	struct icmp6hdr tmp_hdr;
674 	struct flowi6 fl6;
675 	struct icmpv6_msg msg;
676 	struct dst_entry *dst;
677 	struct ipcm6_cookie ipc6;
678 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
679 
680 	saddr = &ipv6_hdr(skb)->daddr;
681 
682 	if (!ipv6_unicast_destination(skb) &&
683 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
684 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
685 		saddr = NULL;
686 
687 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
688 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
689 
690 	memset(&fl6, 0, sizeof(fl6));
691 	fl6.flowi6_proto = IPPROTO_ICMPV6;
692 	fl6.daddr = ipv6_hdr(skb)->saddr;
693 	if (saddr)
694 		fl6.saddr = *saddr;
695 	fl6.flowi6_oif = icmp6_iif(skb);
696 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
697 	fl6.flowi6_mark = mark;
698 	fl6.flowi6_uid = sock_net_uid(net, NULL);
699 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
700 
701 	local_bh_disable();
702 	sk = icmpv6_xmit_lock(net);
703 	if (!sk)
704 		goto out_bh_enable;
705 	sk->sk_mark = mark;
706 	np = inet6_sk(sk);
707 
708 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
709 		fl6.flowi6_oif = np->mcast_oif;
710 	else if (!fl6.flowi6_oif)
711 		fl6.flowi6_oif = np->ucast_oif;
712 
713 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
714 		goto out;
715 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
716 	if (IS_ERR(dst))
717 		goto out;
718 
719 	idev = __in6_dev_get(skb->dev);
720 
721 	msg.skb = skb;
722 	msg.offset = 0;
723 	msg.type = ICMPV6_ECHO_REPLY;
724 
725 	ipcm6_init_sk(&ipc6, np);
726 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
727 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
728 
729 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
730 			    skb->len + sizeof(struct icmp6hdr),
731 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
732 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
733 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
734 		ip6_flush_pending_frames(sk);
735 	} else {
736 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
737 					   skb->len + sizeof(struct icmp6hdr));
738 	}
739 	dst_release(dst);
740 out:
741 	icmpv6_xmit_unlock(sk);
742 out_bh_enable:
743 	local_bh_enable();
744 }
745 
746 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
747 {
748 	const struct inet6_protocol *ipprot;
749 	int inner_offset;
750 	__be16 frag_off;
751 	u8 nexthdr;
752 	struct net *net = dev_net(skb->dev);
753 
754 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
755 		goto out;
756 
757 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
758 	if (ipv6_ext_hdr(nexthdr)) {
759 		/* now skip over extension headers */
760 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
761 						&nexthdr, &frag_off);
762 		if (inner_offset < 0)
763 			goto out;
764 	} else {
765 		inner_offset = sizeof(struct ipv6hdr);
766 	}
767 
768 	/* Checkin header including 8 bytes of inner protocol header. */
769 	if (!pskb_may_pull(skb, inner_offset+8))
770 		goto out;
771 
772 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
773 	   Without this we will not able f.e. to make source routed
774 	   pmtu discovery.
775 	   Corresponding argument (opt) to notifiers is already added.
776 	   --ANK (980726)
777 	 */
778 
779 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
780 	if (ipprot && ipprot->err_handler)
781 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
782 
783 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
784 	return;
785 
786 out:
787 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
788 }
789 
790 /*
791  *	Handle icmp messages
792  */
793 
794 static int icmpv6_rcv(struct sk_buff *skb)
795 {
796 	struct net_device *dev = skb->dev;
797 	struct inet6_dev *idev = __in6_dev_get(dev);
798 	const struct in6_addr *saddr, *daddr;
799 	struct icmp6hdr *hdr;
800 	u8 type;
801 	bool success = false;
802 
803 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
804 		struct sec_path *sp = skb_sec_path(skb);
805 		int nh;
806 
807 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
808 				 XFRM_STATE_ICMP))
809 			goto drop_no_count;
810 
811 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
812 			goto drop_no_count;
813 
814 		nh = skb_network_offset(skb);
815 		skb_set_network_header(skb, sizeof(*hdr));
816 
817 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
818 			goto drop_no_count;
819 
820 		skb_set_network_header(skb, nh);
821 	}
822 
823 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
824 
825 	saddr = &ipv6_hdr(skb)->saddr;
826 	daddr = &ipv6_hdr(skb)->daddr;
827 
828 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
829 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
830 				    saddr, daddr);
831 		goto csum_error;
832 	}
833 
834 	if (!pskb_pull(skb, sizeof(*hdr)))
835 		goto discard_it;
836 
837 	hdr = icmp6_hdr(skb);
838 
839 	type = hdr->icmp6_type;
840 
841 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
842 
843 	switch (type) {
844 	case ICMPV6_ECHO_REQUEST:
845 		icmpv6_echo_reply(skb);
846 		break;
847 
848 	case ICMPV6_ECHO_REPLY:
849 		success = ping_rcv(skb);
850 		break;
851 
852 	case ICMPV6_PKT_TOOBIG:
853 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
854 		   standard destination cache. Seems, only "advanced"
855 		   destination cache will allow to solve this problem
856 		   --ANK (980726)
857 		 */
858 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
859 			goto discard_it;
860 		hdr = icmp6_hdr(skb);
861 
862 		/* to notify */
863 		/* fall through */
864 	case ICMPV6_DEST_UNREACH:
865 	case ICMPV6_TIME_EXCEED:
866 	case ICMPV6_PARAMPROB:
867 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
868 		break;
869 
870 	case NDISC_ROUTER_SOLICITATION:
871 	case NDISC_ROUTER_ADVERTISEMENT:
872 	case NDISC_NEIGHBOUR_SOLICITATION:
873 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
874 	case NDISC_REDIRECT:
875 		ndisc_rcv(skb);
876 		break;
877 
878 	case ICMPV6_MGM_QUERY:
879 		igmp6_event_query(skb);
880 		break;
881 
882 	case ICMPV6_MGM_REPORT:
883 		igmp6_event_report(skb);
884 		break;
885 
886 	case ICMPV6_MGM_REDUCTION:
887 	case ICMPV6_NI_QUERY:
888 	case ICMPV6_NI_REPLY:
889 	case ICMPV6_MLD2_REPORT:
890 	case ICMPV6_DHAAD_REQUEST:
891 	case ICMPV6_DHAAD_REPLY:
892 	case ICMPV6_MOBILE_PREFIX_SOL:
893 	case ICMPV6_MOBILE_PREFIX_ADV:
894 		break;
895 
896 	default:
897 		/* informational */
898 		if (type & ICMPV6_INFOMSG_MASK)
899 			break;
900 
901 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
902 				    saddr, daddr);
903 
904 		/*
905 		 * error of unknown type.
906 		 * must pass to upper level
907 		 */
908 
909 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
910 	}
911 
912 	/* until the v6 path can be better sorted assume failure and
913 	 * preserve the status quo behaviour for the rest of the paths to here
914 	 */
915 	if (success)
916 		consume_skb(skb);
917 	else
918 		kfree_skb(skb);
919 
920 	return 0;
921 
922 csum_error:
923 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
924 discard_it:
925 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
926 drop_no_count:
927 	kfree_skb(skb);
928 	return 0;
929 }
930 
931 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
932 		      u8 type,
933 		      const struct in6_addr *saddr,
934 		      const struct in6_addr *daddr,
935 		      int oif)
936 {
937 	memset(fl6, 0, sizeof(*fl6));
938 	fl6->saddr = *saddr;
939 	fl6->daddr = *daddr;
940 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
941 	fl6->fl6_icmp_type	= type;
942 	fl6->fl6_icmp_code	= 0;
943 	fl6->flowi6_oif		= oif;
944 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
945 }
946 
947 static int __net_init icmpv6_sk_init(struct net *net)
948 {
949 	struct sock *sk;
950 	int err, i, j;
951 
952 	net->ipv6.icmp_sk =
953 		kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
954 	if (!net->ipv6.icmp_sk)
955 		return -ENOMEM;
956 
957 	for_each_possible_cpu(i) {
958 		err = inet_ctl_sock_create(&sk, PF_INET6,
959 					   SOCK_RAW, IPPROTO_ICMPV6, net);
960 		if (err < 0) {
961 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
962 			       err);
963 			goto fail;
964 		}
965 
966 		net->ipv6.icmp_sk[i] = sk;
967 
968 		/* Enough space for 2 64K ICMP packets, including
969 		 * sk_buff struct overhead.
970 		 */
971 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
972 	}
973 	return 0;
974 
975  fail:
976 	for (j = 0; j < i; j++)
977 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
978 	kfree(net->ipv6.icmp_sk);
979 	return err;
980 }
981 
982 static void __net_exit icmpv6_sk_exit(struct net *net)
983 {
984 	int i;
985 
986 	for_each_possible_cpu(i) {
987 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
988 	}
989 	kfree(net->ipv6.icmp_sk);
990 }
991 
992 static struct pernet_operations icmpv6_sk_ops = {
993 	.init = icmpv6_sk_init,
994 	.exit = icmpv6_sk_exit,
995 };
996 
997 int __init icmpv6_init(void)
998 {
999 	int err;
1000 
1001 	err = register_pernet_subsys(&icmpv6_sk_ops);
1002 	if (err < 0)
1003 		return err;
1004 
1005 	err = -EAGAIN;
1006 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1007 		goto fail;
1008 
1009 	err = inet6_register_icmp_sender(icmp6_send);
1010 	if (err)
1011 		goto sender_reg_err;
1012 	return 0;
1013 
1014 sender_reg_err:
1015 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1016 fail:
1017 	pr_err("Failed to register ICMP6 protocol\n");
1018 	unregister_pernet_subsys(&icmpv6_sk_ops);
1019 	return err;
1020 }
1021 
1022 void icmpv6_cleanup(void)
1023 {
1024 	inet6_unregister_icmp_sender(icmp6_send);
1025 	unregister_pernet_subsys(&icmpv6_sk_ops);
1026 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1027 }
1028 
1029 
1030 static const struct icmp6_err {
1031 	int err;
1032 	int fatal;
1033 } tab_unreach[] = {
1034 	{	/* NOROUTE */
1035 		.err	= ENETUNREACH,
1036 		.fatal	= 0,
1037 	},
1038 	{	/* ADM_PROHIBITED */
1039 		.err	= EACCES,
1040 		.fatal	= 1,
1041 	},
1042 	{	/* Was NOT_NEIGHBOUR, now reserved */
1043 		.err	= EHOSTUNREACH,
1044 		.fatal	= 0,
1045 	},
1046 	{	/* ADDR_UNREACH	*/
1047 		.err	= EHOSTUNREACH,
1048 		.fatal	= 0,
1049 	},
1050 	{	/* PORT_UNREACH	*/
1051 		.err	= ECONNREFUSED,
1052 		.fatal	= 1,
1053 	},
1054 	{	/* POLICY_FAIL */
1055 		.err	= EACCES,
1056 		.fatal	= 1,
1057 	},
1058 	{	/* REJECT_ROUTE	*/
1059 		.err	= EACCES,
1060 		.fatal	= 1,
1061 	},
1062 };
1063 
1064 int icmpv6_err_convert(u8 type, u8 code, int *err)
1065 {
1066 	int fatal = 0;
1067 
1068 	*err = EPROTO;
1069 
1070 	switch (type) {
1071 	case ICMPV6_DEST_UNREACH:
1072 		fatal = 1;
1073 		if (code < ARRAY_SIZE(tab_unreach)) {
1074 			*err  = tab_unreach[code].err;
1075 			fatal = tab_unreach[code].fatal;
1076 		}
1077 		break;
1078 
1079 	case ICMPV6_PKT_TOOBIG:
1080 		*err = EMSGSIZE;
1081 		break;
1082 
1083 	case ICMPV6_PARAMPROB:
1084 		*err = EPROTO;
1085 		fatal = 1;
1086 		break;
1087 
1088 	case ICMPV6_TIME_EXCEED:
1089 		*err = EHOSTUNREACH;
1090 		break;
1091 	}
1092 
1093 	return fatal;
1094 }
1095 EXPORT_SYMBOL(icmpv6_err_convert);
1096 
1097 #ifdef CONFIG_SYSCTL
1098 static struct ctl_table ipv6_icmp_table_template[] = {
1099 	{
1100 		.procname	= "ratelimit",
1101 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1102 		.maxlen		= sizeof(int),
1103 		.mode		= 0644,
1104 		.proc_handler	= proc_dointvec_ms_jiffies,
1105 	},
1106 	{ },
1107 };
1108 
1109 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1110 {
1111 	struct ctl_table *table;
1112 
1113 	table = kmemdup(ipv6_icmp_table_template,
1114 			sizeof(ipv6_icmp_table_template),
1115 			GFP_KERNEL);
1116 
1117 	if (table)
1118 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1119 
1120 	return table;
1121 }
1122 #endif
1123