xref: /openbmc/linux/net/ipv6/icmp.c (revision 4832c30d)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 /* Called with BH disabled */
114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
115 {
116 	struct sock *sk;
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 static bool icmpv6_mask_allow(int type)
170 {
171 	/* Informational messages are not limited. */
172 	if (type & ICMPV6_INFOMSG_MASK)
173 		return true;
174 
175 	/* Do not limit pmtu discovery, it would break it. */
176 	if (type == ICMPV6_PKT_TOOBIG)
177 		return true;
178 
179 	return false;
180 }
181 
182 static bool icmpv6_global_allow(int type)
183 {
184 	if (icmpv6_mask_allow(type))
185 		return true;
186 
187 	if (icmp_global_allow())
188 		return true;
189 
190 	return false;
191 }
192 
193 /*
194  * Check the ICMP output rate limit
195  */
196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
197 			       struct flowi6 *fl6)
198 {
199 	struct net *net = sock_net(sk);
200 	struct dst_entry *dst;
201 	bool res = false;
202 
203 	if (icmpv6_mask_allow(type))
204 		return true;
205 
206 	/*
207 	 * Look up the output route.
208 	 * XXX: perhaps the expire for routing entries cloned by
209 	 * this lookup should be more aggressive (not longer than timeout).
210 	 */
211 	dst = ip6_route_output(net, sk, fl6);
212 	if (dst->error) {
213 		IP6_INC_STATS(net, ip6_dst_idev(dst),
214 			      IPSTATS_MIB_OUTNOROUTES);
215 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216 		res = true;
217 	} else {
218 		struct rt6_info *rt = (struct rt6_info *)dst;
219 		int tmo = net->ipv6.sysctl.icmpv6_time;
220 		struct inet_peer *peer;
221 
222 		/* Give more bandwidth to wider prefixes. */
223 		if (rt->rt6i_dst.plen < 128)
224 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225 
226 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
227 		res = inet_peer_xrlim_allow(peer, tmo);
228 		if (peer)
229 			inet_putpeer(peer);
230 	}
231 	dst_release(dst);
232 	return res;
233 }
234 
235 /*
236  *	an inline helper for the "simple" if statement below
237  *	checks if parameter problem report is caused by an
238  *	unrecognized IPv6 option that has the Option Type
239  *	highest-order two bits set to 10
240  */
241 
242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
243 {
244 	u8 _optval, *op;
245 
246 	offset += skb_network_offset(skb);
247 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248 	if (!op)
249 		return true;
250 	return (*op & 0xC0) == 0x80;
251 }
252 
253 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 			       struct icmp6hdr *thdr, int len)
255 {
256 	struct sk_buff *skb;
257 	struct icmp6hdr *icmp6h;
258 	int err = 0;
259 
260 	skb = skb_peek(&sk->sk_write_queue);
261 	if (!skb)
262 		goto out;
263 
264 	icmp6h = icmp6_hdr(skb);
265 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
266 	icmp6h->icmp6_cksum = 0;
267 
268 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
269 		skb->csum = csum_partial(icmp6h,
270 					sizeof(struct icmp6hdr), skb->csum);
271 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
272 						      &fl6->daddr,
273 						      len, fl6->flowi6_proto,
274 						      skb->csum);
275 	} else {
276 		__wsum tmp_csum = 0;
277 
278 		skb_queue_walk(&sk->sk_write_queue, skb) {
279 			tmp_csum = csum_add(tmp_csum, skb->csum);
280 		}
281 
282 		tmp_csum = csum_partial(icmp6h,
283 					sizeof(struct icmp6hdr), tmp_csum);
284 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
285 						      &fl6->daddr,
286 						      len, fl6->flowi6_proto,
287 						      tmp_csum);
288 	}
289 	ip6_push_pending_frames(sk);
290 out:
291 	return err;
292 }
293 
294 struct icmpv6_msg {
295 	struct sk_buff	*skb;
296 	int		offset;
297 	uint8_t		type;
298 };
299 
300 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
301 {
302 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
303 	struct sk_buff *org_skb = msg->skb;
304 	__wsum csum = 0;
305 
306 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
307 				      to, len, csum);
308 	skb->csum = csum_block_add(skb->csum, csum, odd);
309 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
310 		nf_ct_attach(skb, org_skb);
311 	return 0;
312 }
313 
314 #if IS_ENABLED(CONFIG_IPV6_MIP6)
315 static void mip6_addr_swap(struct sk_buff *skb)
316 {
317 	struct ipv6hdr *iph = ipv6_hdr(skb);
318 	struct inet6_skb_parm *opt = IP6CB(skb);
319 	struct ipv6_destopt_hao *hao;
320 	struct in6_addr tmp;
321 	int off;
322 
323 	if (opt->dsthao) {
324 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
325 		if (likely(off >= 0)) {
326 			hao = (struct ipv6_destopt_hao *)
327 					(skb_network_header(skb) + off);
328 			tmp = iph->saddr;
329 			iph->saddr = hao->addr;
330 			hao->addr = tmp;
331 		}
332 	}
333 }
334 #else
335 static inline void mip6_addr_swap(struct sk_buff *skb) {}
336 #endif
337 
338 static struct dst_entry *icmpv6_route_lookup(struct net *net,
339 					     struct sk_buff *skb,
340 					     struct sock *sk,
341 					     struct flowi6 *fl6)
342 {
343 	struct dst_entry *dst, *dst2;
344 	struct flowi6 fl2;
345 	int err;
346 
347 	err = ip6_dst_lookup(net, sk, &dst, fl6);
348 	if (err)
349 		return ERR_PTR(err);
350 
351 	/*
352 	 * We won't send icmp if the destination is known
353 	 * anycast.
354 	 */
355 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
356 		net_dbg_ratelimited("icmp6_send: acast source\n");
357 		dst_release(dst);
358 		return ERR_PTR(-EINVAL);
359 	}
360 
361 	/* No need to clone since we're just using its address. */
362 	dst2 = dst;
363 
364 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
365 	if (!IS_ERR(dst)) {
366 		if (dst != dst2)
367 			return dst;
368 	} else {
369 		if (PTR_ERR(dst) == -EPERM)
370 			dst = NULL;
371 		else
372 			return dst;
373 	}
374 
375 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
376 	if (err)
377 		goto relookup_failed;
378 
379 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
380 	if (err)
381 		goto relookup_failed;
382 
383 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
384 	if (!IS_ERR(dst2)) {
385 		dst_release(dst);
386 		dst = dst2;
387 	} else {
388 		err = PTR_ERR(dst2);
389 		if (err == -EPERM) {
390 			dst_release(dst);
391 			return dst2;
392 		} else
393 			goto relookup_failed;
394 	}
395 
396 relookup_failed:
397 	if (dst)
398 		return dst;
399 	return ERR_PTR(err);
400 }
401 
402 /*
403  *	Send an ICMP message in response to a packet in error
404  */
405 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
406 		       const struct in6_addr *force_saddr)
407 {
408 	struct net *net = dev_net(skb->dev);
409 	struct inet6_dev *idev = NULL;
410 	struct ipv6hdr *hdr = ipv6_hdr(skb);
411 	struct sock *sk;
412 	struct ipv6_pinfo *np;
413 	const struct in6_addr *saddr = NULL;
414 	struct dst_entry *dst;
415 	struct icmp6hdr tmp_hdr;
416 	struct flowi6 fl6;
417 	struct icmpv6_msg msg;
418 	struct sockcm_cookie sockc_unused = {0};
419 	struct ipcm6_cookie ipc6;
420 	int iif = 0;
421 	int addr_type = 0;
422 	int len;
423 	int err = 0;
424 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
425 
426 	if ((u8 *)hdr < skb->head ||
427 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
428 		return;
429 
430 	/*
431 	 *	Make sure we respect the rules
432 	 *	i.e. RFC 1885 2.4(e)
433 	 *	Rule (e.1) is enforced by not using icmp6_send
434 	 *	in any code that processes icmp errors.
435 	 */
436 	addr_type = ipv6_addr_type(&hdr->daddr);
437 
438 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
439 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
440 		saddr = &hdr->daddr;
441 
442 	/*
443 	 *	Dest addr check
444 	 */
445 
446 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
447 		if (type != ICMPV6_PKT_TOOBIG &&
448 		    !(type == ICMPV6_PARAMPROB &&
449 		      code == ICMPV6_UNK_OPTION &&
450 		      (opt_unrec(skb, info))))
451 			return;
452 
453 		saddr = NULL;
454 	}
455 
456 	addr_type = ipv6_addr_type(&hdr->saddr);
457 
458 	/*
459 	 *	Source addr check
460 	 */
461 
462 	if (__ipv6_addr_needs_scope_id(addr_type)) {
463 		iif = skb->dev->ifindex;
464 
465 		/* for local packets, get the real device index */
466 		if (iif == LOOPBACK_IFINDEX) {
467 			dst = skb_dst(skb);
468 			if (dst) {
469 				struct rt6_info *rt;
470 
471 				rt = container_of(dst, struct rt6_info, dst);
472 				iif = rt->rt6i_idev->dev->ifindex;
473 			}
474 		}
475 	} else {
476 		dst = skb_dst(skb);
477 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
478 	}
479 
480 	/*
481 	 *	Must not send error if the source does not uniquely
482 	 *	identify a single node (RFC2463 Section 2.4).
483 	 *	We check unspecified / multicast addresses here,
484 	 *	and anycast addresses will be checked later.
485 	 */
486 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
487 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
488 				    &hdr->saddr, &hdr->daddr);
489 		return;
490 	}
491 
492 	/*
493 	 *	Never answer to a ICMP packet.
494 	 */
495 	if (is_ineligible(skb)) {
496 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
497 				    &hdr->saddr, &hdr->daddr);
498 		return;
499 	}
500 
501 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
502 	local_bh_disable();
503 
504 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
505 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
506 		goto out_bh_enable;
507 
508 	mip6_addr_swap(skb);
509 
510 	memset(&fl6, 0, sizeof(fl6));
511 	fl6.flowi6_proto = IPPROTO_ICMPV6;
512 	fl6.daddr = hdr->saddr;
513 	if (force_saddr)
514 		saddr = force_saddr;
515 	if (saddr)
516 		fl6.saddr = *saddr;
517 	fl6.flowi6_mark = mark;
518 	fl6.flowi6_oif = iif;
519 	fl6.fl6_icmp_type = type;
520 	fl6.fl6_icmp_code = code;
521 	fl6.flowi6_uid = sock_net_uid(net, NULL);
522 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
523 
524 	sk = icmpv6_xmit_lock(net);
525 	if (!sk)
526 		goto out_bh_enable;
527 
528 	sk->sk_mark = mark;
529 	np = inet6_sk(sk);
530 
531 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
532 		goto out;
533 
534 	tmp_hdr.icmp6_type = type;
535 	tmp_hdr.icmp6_code = code;
536 	tmp_hdr.icmp6_cksum = 0;
537 	tmp_hdr.icmp6_pointer = htonl(info);
538 
539 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
540 		fl6.flowi6_oif = np->mcast_oif;
541 	else if (!fl6.flowi6_oif)
542 		fl6.flowi6_oif = np->ucast_oif;
543 
544 	ipc6.tclass = np->tclass;
545 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
546 
547 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
548 	if (IS_ERR(dst))
549 		goto out;
550 
551 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
552 	ipc6.dontfrag = np->dontfrag;
553 	ipc6.opt = NULL;
554 
555 	msg.skb = skb;
556 	msg.offset = skb_network_offset(skb);
557 	msg.type = type;
558 
559 	len = skb->len - msg.offset;
560 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
561 	if (len < 0) {
562 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
563 				    &hdr->saddr, &hdr->daddr);
564 		goto out_dst_release;
565 	}
566 
567 	rcu_read_lock();
568 	idev = __in6_dev_get(skb->dev);
569 
570 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
571 			      len + sizeof(struct icmp6hdr),
572 			      sizeof(struct icmp6hdr),
573 			      &ipc6, &fl6, (struct rt6_info *)dst,
574 			      MSG_DONTWAIT, &sockc_unused);
575 	if (err) {
576 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
577 		ip6_flush_pending_frames(sk);
578 	} else {
579 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
580 						 len + sizeof(struct icmp6hdr));
581 	}
582 	rcu_read_unlock();
583 out_dst_release:
584 	dst_release(dst);
585 out:
586 	icmpv6_xmit_unlock(sk);
587 out_bh_enable:
588 	local_bh_enable();
589 }
590 
591 /* Slightly more convenient version of icmp6_send.
592  */
593 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
594 {
595 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
596 	kfree_skb(skb);
597 }
598 
599 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
600  * if sufficient data bytes are available
601  * @nhs is the size of the tunnel header(s) :
602  *  Either an IPv4 header for SIT encap
603  *         an IPv4 header + GRE header for GRE encap
604  */
605 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
606 			       unsigned int data_len)
607 {
608 	struct in6_addr temp_saddr;
609 	struct rt6_info *rt;
610 	struct sk_buff *skb2;
611 	u32 info = 0;
612 
613 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
614 		return 1;
615 
616 	/* RFC 4884 (partial) support for ICMP extensions */
617 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
618 		data_len = 0;
619 
620 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
621 
622 	if (!skb2)
623 		return 1;
624 
625 	skb_dst_drop(skb2);
626 	skb_pull(skb2, nhs);
627 	skb_reset_network_header(skb2);
628 
629 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
630 
631 	if (rt && rt->dst.dev)
632 		skb2->dev = rt->dst.dev;
633 
634 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
635 
636 	if (data_len) {
637 		/* RFC 4884 (partial) support :
638 		 * insert 0 padding at the end, before the extensions
639 		 */
640 		__skb_push(skb2, nhs);
641 		skb_reset_network_header(skb2);
642 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
643 		memset(skb2->data + data_len - nhs, 0, nhs);
644 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
645 		 * and stored in reserved[0]
646 		 */
647 		info = (data_len/8) << 24;
648 	}
649 	if (type == ICMP_TIME_EXCEEDED)
650 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
651 			   info, &temp_saddr);
652 	else
653 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
654 			   info, &temp_saddr);
655 	if (rt)
656 		ip6_rt_put(rt);
657 
658 	kfree_skb(skb2);
659 
660 	return 0;
661 }
662 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
663 
664 static void icmpv6_echo_reply(struct sk_buff *skb)
665 {
666 	struct net *net = dev_net(skb->dev);
667 	struct sock *sk;
668 	struct inet6_dev *idev;
669 	struct ipv6_pinfo *np;
670 	const struct in6_addr *saddr = NULL;
671 	struct icmp6hdr *icmph = icmp6_hdr(skb);
672 	struct icmp6hdr tmp_hdr;
673 	struct flowi6 fl6;
674 	struct icmpv6_msg msg;
675 	struct dst_entry *dst;
676 	struct ipcm6_cookie ipc6;
677 	int err = 0;
678 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
679 	struct sockcm_cookie sockc_unused = {0};
680 
681 	saddr = &ipv6_hdr(skb)->daddr;
682 
683 	if (!ipv6_unicast_destination(skb) &&
684 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
685 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
686 		saddr = NULL;
687 
688 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
689 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
690 
691 	memset(&fl6, 0, sizeof(fl6));
692 	fl6.flowi6_proto = IPPROTO_ICMPV6;
693 	fl6.daddr = ipv6_hdr(skb)->saddr;
694 	if (saddr)
695 		fl6.saddr = *saddr;
696 	fl6.flowi6_oif = skb->dev->ifindex;
697 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
698 	fl6.flowi6_mark = mark;
699 	fl6.flowi6_uid = sock_net_uid(net, NULL);
700 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
701 
702 	local_bh_disable();
703 	sk = icmpv6_xmit_lock(net);
704 	if (!sk)
705 		goto out_bh_enable;
706 	sk->sk_mark = mark;
707 	np = inet6_sk(sk);
708 
709 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
710 		fl6.flowi6_oif = np->mcast_oif;
711 	else if (!fl6.flowi6_oif)
712 		fl6.flowi6_oif = np->ucast_oif;
713 
714 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
715 	if (err)
716 		goto out;
717 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
718 	if (IS_ERR(dst))
719 		goto out;
720 
721 	idev = __in6_dev_get(skb->dev);
722 
723 	msg.skb = skb;
724 	msg.offset = 0;
725 	msg.type = ICMPV6_ECHO_REPLY;
726 
727 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
728 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
729 	ipc6.dontfrag = np->dontfrag;
730 	ipc6.opt = NULL;
731 
732 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
733 				sizeof(struct icmp6hdr), &ipc6, &fl6,
734 				(struct rt6_info *)dst, MSG_DONTWAIT,
735 				&sockc_unused);
736 
737 	if (err) {
738 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
739 		ip6_flush_pending_frames(sk);
740 	} else {
741 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
742 						 skb->len + sizeof(struct icmp6hdr));
743 	}
744 	dst_release(dst);
745 out:
746 	icmpv6_xmit_unlock(sk);
747 out_bh_enable:
748 	local_bh_enable();
749 }
750 
751 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
752 {
753 	const struct inet6_protocol *ipprot;
754 	int inner_offset;
755 	__be16 frag_off;
756 	u8 nexthdr;
757 	struct net *net = dev_net(skb->dev);
758 
759 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
760 		goto out;
761 
762 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
763 	if (ipv6_ext_hdr(nexthdr)) {
764 		/* now skip over extension headers */
765 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
766 						&nexthdr, &frag_off);
767 		if (inner_offset < 0)
768 			goto out;
769 	} else {
770 		inner_offset = sizeof(struct ipv6hdr);
771 	}
772 
773 	/* Checkin header including 8 bytes of inner protocol header. */
774 	if (!pskb_may_pull(skb, inner_offset+8))
775 		goto out;
776 
777 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
778 	   Without this we will not able f.e. to make source routed
779 	   pmtu discovery.
780 	   Corresponding argument (opt) to notifiers is already added.
781 	   --ANK (980726)
782 	 */
783 
784 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
785 	if (ipprot && ipprot->err_handler)
786 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
787 
788 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
789 	return;
790 
791 out:
792 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
793 }
794 
795 /*
796  *	Handle icmp messages
797  */
798 
799 static int icmpv6_rcv(struct sk_buff *skb)
800 {
801 	struct net_device *dev = skb->dev;
802 	struct inet6_dev *idev = __in6_dev_get(dev);
803 	const struct in6_addr *saddr, *daddr;
804 	struct icmp6hdr *hdr;
805 	u8 type;
806 	bool success = false;
807 
808 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
809 		struct sec_path *sp = skb_sec_path(skb);
810 		int nh;
811 
812 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
813 				 XFRM_STATE_ICMP))
814 			goto drop_no_count;
815 
816 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
817 			goto drop_no_count;
818 
819 		nh = skb_network_offset(skb);
820 		skb_set_network_header(skb, sizeof(*hdr));
821 
822 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
823 			goto drop_no_count;
824 
825 		skb_set_network_header(skb, nh);
826 	}
827 
828 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
829 
830 	saddr = &ipv6_hdr(skb)->saddr;
831 	daddr = &ipv6_hdr(skb)->daddr;
832 
833 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
834 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
835 				    saddr, daddr);
836 		goto csum_error;
837 	}
838 
839 	if (!pskb_pull(skb, sizeof(*hdr)))
840 		goto discard_it;
841 
842 	hdr = icmp6_hdr(skb);
843 
844 	type = hdr->icmp6_type;
845 
846 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
847 
848 	switch (type) {
849 	case ICMPV6_ECHO_REQUEST:
850 		icmpv6_echo_reply(skb);
851 		break;
852 
853 	case ICMPV6_ECHO_REPLY:
854 		success = ping_rcv(skb);
855 		break;
856 
857 	case ICMPV6_PKT_TOOBIG:
858 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
859 		   standard destination cache. Seems, only "advanced"
860 		   destination cache will allow to solve this problem
861 		   --ANK (980726)
862 		 */
863 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
864 			goto discard_it;
865 		hdr = icmp6_hdr(skb);
866 
867 		/*
868 		 *	Drop through to notify
869 		 */
870 
871 	case ICMPV6_DEST_UNREACH:
872 	case ICMPV6_TIME_EXCEED:
873 	case ICMPV6_PARAMPROB:
874 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
875 		break;
876 
877 	case NDISC_ROUTER_SOLICITATION:
878 	case NDISC_ROUTER_ADVERTISEMENT:
879 	case NDISC_NEIGHBOUR_SOLICITATION:
880 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
881 	case NDISC_REDIRECT:
882 		ndisc_rcv(skb);
883 		break;
884 
885 	case ICMPV6_MGM_QUERY:
886 		igmp6_event_query(skb);
887 		break;
888 
889 	case ICMPV6_MGM_REPORT:
890 		igmp6_event_report(skb);
891 		break;
892 
893 	case ICMPV6_MGM_REDUCTION:
894 	case ICMPV6_NI_QUERY:
895 	case ICMPV6_NI_REPLY:
896 	case ICMPV6_MLD2_REPORT:
897 	case ICMPV6_DHAAD_REQUEST:
898 	case ICMPV6_DHAAD_REPLY:
899 	case ICMPV6_MOBILE_PREFIX_SOL:
900 	case ICMPV6_MOBILE_PREFIX_ADV:
901 		break;
902 
903 	default:
904 		/* informational */
905 		if (type & ICMPV6_INFOMSG_MASK)
906 			break;
907 
908 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
909 				    saddr, daddr);
910 
911 		/*
912 		 * error of unknown type.
913 		 * must pass to upper level
914 		 */
915 
916 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
917 	}
918 
919 	/* until the v6 path can be better sorted assume failure and
920 	 * preserve the status quo behaviour for the rest of the paths to here
921 	 */
922 	if (success)
923 		consume_skb(skb);
924 	else
925 		kfree_skb(skb);
926 
927 	return 0;
928 
929 csum_error:
930 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
931 discard_it:
932 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
933 drop_no_count:
934 	kfree_skb(skb);
935 	return 0;
936 }
937 
938 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
939 		      u8 type,
940 		      const struct in6_addr *saddr,
941 		      const struct in6_addr *daddr,
942 		      int oif)
943 {
944 	memset(fl6, 0, sizeof(*fl6));
945 	fl6->saddr = *saddr;
946 	fl6->daddr = *daddr;
947 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
948 	fl6->fl6_icmp_type	= type;
949 	fl6->fl6_icmp_code	= 0;
950 	fl6->flowi6_oif		= oif;
951 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
952 }
953 
954 static int __net_init icmpv6_sk_init(struct net *net)
955 {
956 	struct sock *sk;
957 	int err, i, j;
958 
959 	net->ipv6.icmp_sk =
960 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
961 	if (!net->ipv6.icmp_sk)
962 		return -ENOMEM;
963 
964 	for_each_possible_cpu(i) {
965 		err = inet_ctl_sock_create(&sk, PF_INET6,
966 					   SOCK_RAW, IPPROTO_ICMPV6, net);
967 		if (err < 0) {
968 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
969 			       err);
970 			goto fail;
971 		}
972 
973 		net->ipv6.icmp_sk[i] = sk;
974 
975 		/* Enough space for 2 64K ICMP packets, including
976 		 * sk_buff struct overhead.
977 		 */
978 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
979 	}
980 	return 0;
981 
982  fail:
983 	for (j = 0; j < i; j++)
984 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
985 	kfree(net->ipv6.icmp_sk);
986 	return err;
987 }
988 
989 static void __net_exit icmpv6_sk_exit(struct net *net)
990 {
991 	int i;
992 
993 	for_each_possible_cpu(i) {
994 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
995 	}
996 	kfree(net->ipv6.icmp_sk);
997 }
998 
999 static struct pernet_operations icmpv6_sk_ops = {
1000 	.init = icmpv6_sk_init,
1001 	.exit = icmpv6_sk_exit,
1002 };
1003 
1004 int __init icmpv6_init(void)
1005 {
1006 	int err;
1007 
1008 	err = register_pernet_subsys(&icmpv6_sk_ops);
1009 	if (err < 0)
1010 		return err;
1011 
1012 	err = -EAGAIN;
1013 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1014 		goto fail;
1015 
1016 	err = inet6_register_icmp_sender(icmp6_send);
1017 	if (err)
1018 		goto sender_reg_err;
1019 	return 0;
1020 
1021 sender_reg_err:
1022 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1023 fail:
1024 	pr_err("Failed to register ICMP6 protocol\n");
1025 	unregister_pernet_subsys(&icmpv6_sk_ops);
1026 	return err;
1027 }
1028 
1029 void icmpv6_cleanup(void)
1030 {
1031 	inet6_unregister_icmp_sender(icmp6_send);
1032 	unregister_pernet_subsys(&icmpv6_sk_ops);
1033 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1034 }
1035 
1036 
1037 static const struct icmp6_err {
1038 	int err;
1039 	int fatal;
1040 } tab_unreach[] = {
1041 	{	/* NOROUTE */
1042 		.err	= ENETUNREACH,
1043 		.fatal	= 0,
1044 	},
1045 	{	/* ADM_PROHIBITED */
1046 		.err	= EACCES,
1047 		.fatal	= 1,
1048 	},
1049 	{	/* Was NOT_NEIGHBOUR, now reserved */
1050 		.err	= EHOSTUNREACH,
1051 		.fatal	= 0,
1052 	},
1053 	{	/* ADDR_UNREACH	*/
1054 		.err	= EHOSTUNREACH,
1055 		.fatal	= 0,
1056 	},
1057 	{	/* PORT_UNREACH	*/
1058 		.err	= ECONNREFUSED,
1059 		.fatal	= 1,
1060 	},
1061 	{	/* POLICY_FAIL */
1062 		.err	= EACCES,
1063 		.fatal	= 1,
1064 	},
1065 	{	/* REJECT_ROUTE	*/
1066 		.err	= EACCES,
1067 		.fatal	= 1,
1068 	},
1069 };
1070 
1071 int icmpv6_err_convert(u8 type, u8 code, int *err)
1072 {
1073 	int fatal = 0;
1074 
1075 	*err = EPROTO;
1076 
1077 	switch (type) {
1078 	case ICMPV6_DEST_UNREACH:
1079 		fatal = 1;
1080 		if (code < ARRAY_SIZE(tab_unreach)) {
1081 			*err  = tab_unreach[code].err;
1082 			fatal = tab_unreach[code].fatal;
1083 		}
1084 		break;
1085 
1086 	case ICMPV6_PKT_TOOBIG:
1087 		*err = EMSGSIZE;
1088 		break;
1089 
1090 	case ICMPV6_PARAMPROB:
1091 		*err = EPROTO;
1092 		fatal = 1;
1093 		break;
1094 
1095 	case ICMPV6_TIME_EXCEED:
1096 		*err = EHOSTUNREACH;
1097 		break;
1098 	}
1099 
1100 	return fatal;
1101 }
1102 EXPORT_SYMBOL(icmpv6_err_convert);
1103 
1104 #ifdef CONFIG_SYSCTL
1105 static struct ctl_table ipv6_icmp_table_template[] = {
1106 	{
1107 		.procname	= "ratelimit",
1108 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1109 		.maxlen		= sizeof(int),
1110 		.mode		= 0644,
1111 		.proc_handler	= proc_dointvec_ms_jiffies,
1112 	},
1113 	{ },
1114 };
1115 
1116 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1117 {
1118 	struct ctl_table *table;
1119 
1120 	table = kmemdup(ipv6_icmp_table_template,
1121 			sizeof(ipv6_icmp_table_template),
1122 			GFP_KERNEL);
1123 
1124 	if (table)
1125 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1126 
1127 	return table;
1128 }
1129 #endif
1130