xref: /openbmc/linux/net/ipv6/icmp.c (revision 0b03a5ca)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return *this_cpu_ptr(net->ipv6.icmp_sk);
85 }
86 
87 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 
104 	return 0;
105 }
106 
107 static int icmpv6_rcv(struct sk_buff *skb);
108 
109 static const struct inet6_protocol icmpv6_protocol = {
110 	.handler	=	icmpv6_rcv,
111 	.err_handler	=	icmpv6_err,
112 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
113 };
114 
115 /* Called with BH disabled */
116 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
117 {
118 	struct sock *sk;
119 
120 	sk = icmpv6_sk(net);
121 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
122 		/* This can happen if the output path (f.e. SIT or
123 		 * ip6ip6 tunnel) signals dst_link_failure() for an
124 		 * outgoing ICMP6 packet.
125 		 */
126 		return NULL;
127 	}
128 	return sk;
129 }
130 
131 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
132 {
133 	spin_unlock(&sk->sk_lock.slock);
134 }
135 
136 /*
137  * Figure out, may we reply to this packet with icmp error.
138  *
139  * We do not reply, if:
140  *	- it was icmp error message.
141  *	- it is truncated, so that it is known, that protocol is ICMPV6
142  *	  (i.e. in the middle of some exthdr)
143  *
144  *	--ANK (980726)
145  */
146 
147 static bool is_ineligible(const struct sk_buff *skb)
148 {
149 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
150 	int len = skb->len - ptr;
151 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
152 	__be16 frag_off;
153 
154 	if (len < 0)
155 		return true;
156 
157 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
158 	if (ptr < 0)
159 		return false;
160 	if (nexthdr == IPPROTO_ICMPV6) {
161 		u8 _type, *tp;
162 		tp = skb_header_pointer(skb,
163 			ptr+offsetof(struct icmp6hdr, icmp6_type),
164 			sizeof(_type), &_type);
165 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
166 			return true;
167 	}
168 	return false;
169 }
170 
171 static bool icmpv6_mask_allow(int type)
172 {
173 	/* Informational messages are not limited. */
174 	if (type & ICMPV6_INFOMSG_MASK)
175 		return true;
176 
177 	/* Do not limit pmtu discovery, it would break it. */
178 	if (type == ICMPV6_PKT_TOOBIG)
179 		return true;
180 
181 	return false;
182 }
183 
184 static bool icmpv6_global_allow(int type)
185 {
186 	if (icmpv6_mask_allow(type))
187 		return true;
188 
189 	if (icmp_global_allow())
190 		return true;
191 
192 	return false;
193 }
194 
195 /*
196  * Check the ICMP output rate limit
197  */
198 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
199 			       struct flowi6 *fl6)
200 {
201 	struct net *net = sock_net(sk);
202 	struct dst_entry *dst;
203 	bool res = false;
204 
205 	if (icmpv6_mask_allow(type))
206 		return true;
207 
208 	/*
209 	 * Look up the output route.
210 	 * XXX: perhaps the expire for routing entries cloned by
211 	 * this lookup should be more aggressive (not longer than timeout).
212 	 */
213 	dst = ip6_route_output(net, sk, fl6);
214 	if (dst->error) {
215 		IP6_INC_STATS(net, ip6_dst_idev(dst),
216 			      IPSTATS_MIB_OUTNOROUTES);
217 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
218 		res = true;
219 	} else {
220 		struct rt6_info *rt = (struct rt6_info *)dst;
221 		int tmo = net->ipv6.sysctl.icmpv6_time;
222 		struct inet_peer *peer;
223 
224 		/* Give more bandwidth to wider prefixes. */
225 		if (rt->rt6i_dst.plen < 128)
226 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
227 
228 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
229 		res = inet_peer_xrlim_allow(peer, tmo);
230 		if (peer)
231 			inet_putpeer(peer);
232 	}
233 	dst_release(dst);
234 	return res;
235 }
236 
237 /*
238  *	an inline helper for the "simple" if statement below
239  *	checks if parameter problem report is caused by an
240  *	unrecognized IPv6 option that has the Option Type
241  *	highest-order two bits set to 10
242  */
243 
244 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
245 {
246 	u8 _optval, *op;
247 
248 	offset += skb_network_offset(skb);
249 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
250 	if (!op)
251 		return true;
252 	return (*op & 0xC0) == 0x80;
253 }
254 
255 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
256 				struct icmp6hdr *thdr, int len)
257 {
258 	struct sk_buff *skb;
259 	struct icmp6hdr *icmp6h;
260 
261 	skb = skb_peek(&sk->sk_write_queue);
262 	if (!skb)
263 		return;
264 
265 	icmp6h = icmp6_hdr(skb);
266 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
267 	icmp6h->icmp6_cksum = 0;
268 
269 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
270 		skb->csum = csum_partial(icmp6h,
271 					sizeof(struct icmp6hdr), skb->csum);
272 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
273 						      &fl6->daddr,
274 						      len, fl6->flowi6_proto,
275 						      skb->csum);
276 	} else {
277 		__wsum tmp_csum = 0;
278 
279 		skb_queue_walk(&sk->sk_write_queue, skb) {
280 			tmp_csum = csum_add(tmp_csum, skb->csum);
281 		}
282 
283 		tmp_csum = csum_partial(icmp6h,
284 					sizeof(struct icmp6hdr), tmp_csum);
285 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
286 						      &fl6->daddr,
287 						      len, fl6->flowi6_proto,
288 						      tmp_csum);
289 	}
290 	ip6_push_pending_frames(sk);
291 }
292 
293 struct icmpv6_msg {
294 	struct sk_buff	*skb;
295 	int		offset;
296 	uint8_t		type;
297 };
298 
299 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
300 {
301 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
302 	struct sk_buff *org_skb = msg->skb;
303 	__wsum csum = 0;
304 
305 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
306 				      to, len, csum);
307 	skb->csum = csum_block_add(skb->csum, csum, odd);
308 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
309 		nf_ct_attach(skb, org_skb);
310 	return 0;
311 }
312 
313 #if IS_ENABLED(CONFIG_IPV6_MIP6)
314 static void mip6_addr_swap(struct sk_buff *skb)
315 {
316 	struct ipv6hdr *iph = ipv6_hdr(skb);
317 	struct inet6_skb_parm *opt = IP6CB(skb);
318 	struct ipv6_destopt_hao *hao;
319 	struct in6_addr tmp;
320 	int off;
321 
322 	if (opt->dsthao) {
323 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
324 		if (likely(off >= 0)) {
325 			hao = (struct ipv6_destopt_hao *)
326 					(skb_network_header(skb) + off);
327 			tmp = iph->saddr;
328 			iph->saddr = hao->addr;
329 			hao->addr = tmp;
330 		}
331 	}
332 }
333 #else
334 static inline void mip6_addr_swap(struct sk_buff *skb) {}
335 #endif
336 
337 static struct dst_entry *icmpv6_route_lookup(struct net *net,
338 					     struct sk_buff *skb,
339 					     struct sock *sk,
340 					     struct flowi6 *fl6)
341 {
342 	struct dst_entry *dst, *dst2;
343 	struct flowi6 fl2;
344 	int err;
345 
346 	err = ip6_dst_lookup(net, sk, &dst, fl6);
347 	if (err)
348 		return ERR_PTR(err);
349 
350 	/*
351 	 * We won't send icmp if the destination is known
352 	 * anycast.
353 	 */
354 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
355 		net_dbg_ratelimited("icmp6_send: acast source\n");
356 		dst_release(dst);
357 		return ERR_PTR(-EINVAL);
358 	}
359 
360 	/* No need to clone since we're just using its address. */
361 	dst2 = dst;
362 
363 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
364 	if (!IS_ERR(dst)) {
365 		if (dst != dst2)
366 			return dst;
367 	} else {
368 		if (PTR_ERR(dst) == -EPERM)
369 			dst = NULL;
370 		else
371 			return dst;
372 	}
373 
374 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
375 	if (err)
376 		goto relookup_failed;
377 
378 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
379 	if (err)
380 		goto relookup_failed;
381 
382 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
383 	if (!IS_ERR(dst2)) {
384 		dst_release(dst);
385 		dst = dst2;
386 	} else {
387 		err = PTR_ERR(dst2);
388 		if (err == -EPERM) {
389 			dst_release(dst);
390 			return dst2;
391 		} else
392 			goto relookup_failed;
393 	}
394 
395 relookup_failed:
396 	if (dst)
397 		return dst;
398 	return ERR_PTR(err);
399 }
400 
401 static int icmp6_iif(const struct sk_buff *skb)
402 {
403 	int iif = skb->dev->ifindex;
404 
405 	/* for local traffic to local address, skb dev is the loopback
406 	 * device. Check if there is a dst attached to the skb and if so
407 	 * get the real device index. Same is needed for replies to a link
408 	 * local address on a device enslaved to an L3 master device
409 	 */
410 	if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
411 		const struct rt6_info *rt6 = skb_rt6_info(skb);
412 
413 		if (rt6)
414 			iif = rt6->rt6i_idev->dev->ifindex;
415 	}
416 
417 	return iif;
418 }
419 
420 /*
421  *	Send an ICMP message in response to a packet in error
422  */
423 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
424 		       const struct in6_addr *force_saddr)
425 {
426 	struct inet6_dev *idev = NULL;
427 	struct ipv6hdr *hdr = ipv6_hdr(skb);
428 	struct sock *sk;
429 	struct net *net;
430 	struct ipv6_pinfo *np;
431 	const struct in6_addr *saddr = NULL;
432 	struct dst_entry *dst;
433 	struct icmp6hdr tmp_hdr;
434 	struct flowi6 fl6;
435 	struct icmpv6_msg msg;
436 	struct ipcm6_cookie ipc6;
437 	int iif = 0;
438 	int addr_type = 0;
439 	int len;
440 	u32 mark;
441 
442 	if ((u8 *)hdr < skb->head ||
443 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
444 		return;
445 
446 	if (!skb->dev)
447 		return;
448 	net = dev_net(skb->dev);
449 	mark = IP6_REPLY_MARK(net, skb->mark);
450 	/*
451 	 *	Make sure we respect the rules
452 	 *	i.e. RFC 1885 2.4(e)
453 	 *	Rule (e.1) is enforced by not using icmp6_send
454 	 *	in any code that processes icmp errors.
455 	 */
456 	addr_type = ipv6_addr_type(&hdr->daddr);
457 
458 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
459 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
460 		saddr = &hdr->daddr;
461 
462 	/*
463 	 *	Dest addr check
464 	 */
465 
466 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
467 		if (type != ICMPV6_PKT_TOOBIG &&
468 		    !(type == ICMPV6_PARAMPROB &&
469 		      code == ICMPV6_UNK_OPTION &&
470 		      (opt_unrec(skb, info))))
471 			return;
472 
473 		saddr = NULL;
474 	}
475 
476 	addr_type = ipv6_addr_type(&hdr->saddr);
477 
478 	/*
479 	 *	Source addr check
480 	 */
481 
482 	if (__ipv6_addr_needs_scope_id(addr_type)) {
483 		iif = icmp6_iif(skb);
484 	} else {
485 		dst = skb_dst(skb);
486 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
487 	}
488 
489 	/*
490 	 *	Must not send error if the source does not uniquely
491 	 *	identify a single node (RFC2463 Section 2.4).
492 	 *	We check unspecified / multicast addresses here,
493 	 *	and anycast addresses will be checked later.
494 	 */
495 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
496 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
497 				    &hdr->saddr, &hdr->daddr);
498 		return;
499 	}
500 
501 	/*
502 	 *	Never answer to a ICMP packet.
503 	 */
504 	if (is_ineligible(skb)) {
505 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
506 				    &hdr->saddr, &hdr->daddr);
507 		return;
508 	}
509 
510 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
511 	local_bh_disable();
512 
513 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
514 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
515 		goto out_bh_enable;
516 
517 	mip6_addr_swap(skb);
518 
519 	memset(&fl6, 0, sizeof(fl6));
520 	fl6.flowi6_proto = IPPROTO_ICMPV6;
521 	fl6.daddr = hdr->saddr;
522 	if (force_saddr)
523 		saddr = force_saddr;
524 	if (saddr)
525 		fl6.saddr = *saddr;
526 	fl6.flowi6_mark = mark;
527 	fl6.flowi6_oif = iif;
528 	fl6.fl6_icmp_type = type;
529 	fl6.fl6_icmp_code = code;
530 	fl6.flowi6_uid = sock_net_uid(net, NULL);
531 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
532 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
533 
534 	sk = icmpv6_xmit_lock(net);
535 	if (!sk)
536 		goto out_bh_enable;
537 
538 	sk->sk_mark = mark;
539 	np = inet6_sk(sk);
540 
541 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
542 		goto out;
543 
544 	tmp_hdr.icmp6_type = type;
545 	tmp_hdr.icmp6_code = code;
546 	tmp_hdr.icmp6_cksum = 0;
547 	tmp_hdr.icmp6_pointer = htonl(info);
548 
549 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
550 		fl6.flowi6_oif = np->mcast_oif;
551 	else if (!fl6.flowi6_oif)
552 		fl6.flowi6_oif = np->ucast_oif;
553 
554 	ipcm6_init_sk(&ipc6, np);
555 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
556 
557 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
558 	if (IS_ERR(dst))
559 		goto out;
560 
561 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
562 
563 	msg.skb = skb;
564 	msg.offset = skb_network_offset(skb);
565 	msg.type = type;
566 
567 	len = skb->len - msg.offset;
568 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
569 	if (len < 0) {
570 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
571 				    &hdr->saddr, &hdr->daddr);
572 		goto out_dst_release;
573 	}
574 
575 	rcu_read_lock();
576 	idev = __in6_dev_get(skb->dev);
577 
578 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
579 			    len + sizeof(struct icmp6hdr),
580 			    sizeof(struct icmp6hdr),
581 			    &ipc6, &fl6, (struct rt6_info *)dst,
582 			    MSG_DONTWAIT)) {
583 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
584 		ip6_flush_pending_frames(sk);
585 	} else {
586 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
587 					   len + sizeof(struct icmp6hdr));
588 	}
589 	rcu_read_unlock();
590 out_dst_release:
591 	dst_release(dst);
592 out:
593 	icmpv6_xmit_unlock(sk);
594 out_bh_enable:
595 	local_bh_enable();
596 }
597 
598 /* Slightly more convenient version of icmp6_send.
599  */
600 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
601 {
602 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
603 	kfree_skb(skb);
604 }
605 
606 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
607  * if sufficient data bytes are available
608  * @nhs is the size of the tunnel header(s) :
609  *  Either an IPv4 header for SIT encap
610  *         an IPv4 header + GRE header for GRE encap
611  */
612 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
613 			       unsigned int data_len)
614 {
615 	struct in6_addr temp_saddr;
616 	struct rt6_info *rt;
617 	struct sk_buff *skb2;
618 	u32 info = 0;
619 
620 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
621 		return 1;
622 
623 	/* RFC 4884 (partial) support for ICMP extensions */
624 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
625 		data_len = 0;
626 
627 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
628 
629 	if (!skb2)
630 		return 1;
631 
632 	skb_dst_drop(skb2);
633 	skb_pull(skb2, nhs);
634 	skb_reset_network_header(skb2);
635 
636 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
637 			skb, 0);
638 
639 	if (rt && rt->dst.dev)
640 		skb2->dev = rt->dst.dev;
641 
642 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
643 
644 	if (data_len) {
645 		/* RFC 4884 (partial) support :
646 		 * insert 0 padding at the end, before the extensions
647 		 */
648 		__skb_push(skb2, nhs);
649 		skb_reset_network_header(skb2);
650 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
651 		memset(skb2->data + data_len - nhs, 0, nhs);
652 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
653 		 * and stored in reserved[0]
654 		 */
655 		info = (data_len/8) << 24;
656 	}
657 	if (type == ICMP_TIME_EXCEEDED)
658 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
659 			   info, &temp_saddr);
660 	else
661 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
662 			   info, &temp_saddr);
663 	if (rt)
664 		ip6_rt_put(rt);
665 
666 	kfree_skb(skb2);
667 
668 	return 0;
669 }
670 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
671 
672 static void icmpv6_echo_reply(struct sk_buff *skb)
673 {
674 	struct net *net = dev_net(skb->dev);
675 	struct sock *sk;
676 	struct inet6_dev *idev;
677 	struct ipv6_pinfo *np;
678 	const struct in6_addr *saddr = NULL;
679 	struct icmp6hdr *icmph = icmp6_hdr(skb);
680 	struct icmp6hdr tmp_hdr;
681 	struct flowi6 fl6;
682 	struct icmpv6_msg msg;
683 	struct dst_entry *dst;
684 	struct ipcm6_cookie ipc6;
685 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
686 	bool acast;
687 
688 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
689 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
690 		return;
691 
692 	saddr = &ipv6_hdr(skb)->daddr;
693 
694 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
695 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
696 		return;
697 
698 	if (!ipv6_unicast_destination(skb) &&
699 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
700 		saddr = NULL;
701 
702 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
703 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
704 
705 	memset(&fl6, 0, sizeof(fl6));
706 	fl6.flowi6_proto = IPPROTO_ICMPV6;
707 	fl6.daddr = ipv6_hdr(skb)->saddr;
708 	if (saddr)
709 		fl6.saddr = *saddr;
710 	fl6.flowi6_oif = icmp6_iif(skb);
711 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
712 	fl6.flowi6_mark = mark;
713 	fl6.flowi6_uid = sock_net_uid(net, NULL);
714 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
715 
716 	local_bh_disable();
717 	sk = icmpv6_xmit_lock(net);
718 	if (!sk)
719 		goto out_bh_enable;
720 	sk->sk_mark = mark;
721 	np = inet6_sk(sk);
722 
723 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
724 		fl6.flowi6_oif = np->mcast_oif;
725 	else if (!fl6.flowi6_oif)
726 		fl6.flowi6_oif = np->ucast_oif;
727 
728 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
729 		goto out;
730 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
731 	if (IS_ERR(dst))
732 		goto out;
733 
734 	idev = __in6_dev_get(skb->dev);
735 
736 	msg.skb = skb;
737 	msg.offset = 0;
738 	msg.type = ICMPV6_ECHO_REPLY;
739 
740 	ipcm6_init_sk(&ipc6, np);
741 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
742 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
743 
744 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
745 			    skb->len + sizeof(struct icmp6hdr),
746 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
747 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
748 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
749 		ip6_flush_pending_frames(sk);
750 	} else {
751 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
752 					   skb->len + sizeof(struct icmp6hdr));
753 	}
754 	dst_release(dst);
755 out:
756 	icmpv6_xmit_unlock(sk);
757 out_bh_enable:
758 	local_bh_enable();
759 }
760 
761 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
762 {
763 	const struct inet6_protocol *ipprot;
764 	int inner_offset;
765 	__be16 frag_off;
766 	u8 nexthdr;
767 	struct net *net = dev_net(skb->dev);
768 
769 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
770 		goto out;
771 
772 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
773 	if (ipv6_ext_hdr(nexthdr)) {
774 		/* now skip over extension headers */
775 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
776 						&nexthdr, &frag_off);
777 		if (inner_offset < 0)
778 			goto out;
779 	} else {
780 		inner_offset = sizeof(struct ipv6hdr);
781 	}
782 
783 	/* Checkin header including 8 bytes of inner protocol header. */
784 	if (!pskb_may_pull(skb, inner_offset+8))
785 		goto out;
786 
787 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
788 	   Without this we will not able f.e. to make source routed
789 	   pmtu discovery.
790 	   Corresponding argument (opt) to notifiers is already added.
791 	   --ANK (980726)
792 	 */
793 
794 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
795 	if (ipprot && ipprot->err_handler)
796 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
797 
798 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
799 	return;
800 
801 out:
802 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
803 }
804 
805 /*
806  *	Handle icmp messages
807  */
808 
809 static int icmpv6_rcv(struct sk_buff *skb)
810 {
811 	struct net *net = dev_net(skb->dev);
812 	struct net_device *dev = skb->dev;
813 	struct inet6_dev *idev = __in6_dev_get(dev);
814 	const struct in6_addr *saddr, *daddr;
815 	struct icmp6hdr *hdr;
816 	u8 type;
817 	bool success = false;
818 
819 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
820 		struct sec_path *sp = skb_sec_path(skb);
821 		int nh;
822 
823 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
824 				 XFRM_STATE_ICMP))
825 			goto drop_no_count;
826 
827 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
828 			goto drop_no_count;
829 
830 		nh = skb_network_offset(skb);
831 		skb_set_network_header(skb, sizeof(*hdr));
832 
833 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
834 			goto drop_no_count;
835 
836 		skb_set_network_header(skb, nh);
837 	}
838 
839 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
840 
841 	saddr = &ipv6_hdr(skb)->saddr;
842 	daddr = &ipv6_hdr(skb)->daddr;
843 
844 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
845 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
846 				    saddr, daddr);
847 		goto csum_error;
848 	}
849 
850 	if (!pskb_pull(skb, sizeof(*hdr)))
851 		goto discard_it;
852 
853 	hdr = icmp6_hdr(skb);
854 
855 	type = hdr->icmp6_type;
856 
857 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
858 
859 	switch (type) {
860 	case ICMPV6_ECHO_REQUEST:
861 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
862 			icmpv6_echo_reply(skb);
863 		break;
864 
865 	case ICMPV6_ECHO_REPLY:
866 		success = ping_rcv(skb);
867 		break;
868 
869 	case ICMPV6_PKT_TOOBIG:
870 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
871 		   standard destination cache. Seems, only "advanced"
872 		   destination cache will allow to solve this problem
873 		   --ANK (980726)
874 		 */
875 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
876 			goto discard_it;
877 		hdr = icmp6_hdr(skb);
878 
879 		/* to notify */
880 		/* fall through */
881 	case ICMPV6_DEST_UNREACH:
882 	case ICMPV6_TIME_EXCEED:
883 	case ICMPV6_PARAMPROB:
884 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
885 		break;
886 
887 	case NDISC_ROUTER_SOLICITATION:
888 	case NDISC_ROUTER_ADVERTISEMENT:
889 	case NDISC_NEIGHBOUR_SOLICITATION:
890 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
891 	case NDISC_REDIRECT:
892 		ndisc_rcv(skb);
893 		break;
894 
895 	case ICMPV6_MGM_QUERY:
896 		igmp6_event_query(skb);
897 		break;
898 
899 	case ICMPV6_MGM_REPORT:
900 		igmp6_event_report(skb);
901 		break;
902 
903 	case ICMPV6_MGM_REDUCTION:
904 	case ICMPV6_NI_QUERY:
905 	case ICMPV6_NI_REPLY:
906 	case ICMPV6_MLD2_REPORT:
907 	case ICMPV6_DHAAD_REQUEST:
908 	case ICMPV6_DHAAD_REPLY:
909 	case ICMPV6_MOBILE_PREFIX_SOL:
910 	case ICMPV6_MOBILE_PREFIX_ADV:
911 		break;
912 
913 	default:
914 		/* informational */
915 		if (type & ICMPV6_INFOMSG_MASK)
916 			break;
917 
918 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
919 				    saddr, daddr);
920 
921 		/*
922 		 * error of unknown type.
923 		 * must pass to upper level
924 		 */
925 
926 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
927 	}
928 
929 	/* until the v6 path can be better sorted assume failure and
930 	 * preserve the status quo behaviour for the rest of the paths to here
931 	 */
932 	if (success)
933 		consume_skb(skb);
934 	else
935 		kfree_skb(skb);
936 
937 	return 0;
938 
939 csum_error:
940 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
941 discard_it:
942 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
943 drop_no_count:
944 	kfree_skb(skb);
945 	return 0;
946 }
947 
948 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
949 		      u8 type,
950 		      const struct in6_addr *saddr,
951 		      const struct in6_addr *daddr,
952 		      int oif)
953 {
954 	memset(fl6, 0, sizeof(*fl6));
955 	fl6->saddr = *saddr;
956 	fl6->daddr = *daddr;
957 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
958 	fl6->fl6_icmp_type	= type;
959 	fl6->fl6_icmp_code	= 0;
960 	fl6->flowi6_oif		= oif;
961 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
962 }
963 
964 static void __net_exit icmpv6_sk_exit(struct net *net)
965 {
966 	int i;
967 
968 	for_each_possible_cpu(i)
969 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
970 	free_percpu(net->ipv6.icmp_sk);
971 }
972 
973 static int __net_init icmpv6_sk_init(struct net *net)
974 {
975 	struct sock *sk;
976 	int err, i;
977 
978 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
979 	if (!net->ipv6.icmp_sk)
980 		return -ENOMEM;
981 
982 	for_each_possible_cpu(i) {
983 		err = inet_ctl_sock_create(&sk, PF_INET6,
984 					   SOCK_RAW, IPPROTO_ICMPV6, net);
985 		if (err < 0) {
986 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
987 			       err);
988 			goto fail;
989 		}
990 
991 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
992 
993 		/* Enough space for 2 64K ICMP packets, including
994 		 * sk_buff struct overhead.
995 		 */
996 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
997 	}
998 	return 0;
999 
1000  fail:
1001 	icmpv6_sk_exit(net);
1002 	return err;
1003 }
1004 
1005 static struct pernet_operations icmpv6_sk_ops = {
1006 	.init = icmpv6_sk_init,
1007 	.exit = icmpv6_sk_exit,
1008 };
1009 
1010 int __init icmpv6_init(void)
1011 {
1012 	int err;
1013 
1014 	err = register_pernet_subsys(&icmpv6_sk_ops);
1015 	if (err < 0)
1016 		return err;
1017 
1018 	err = -EAGAIN;
1019 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1020 		goto fail;
1021 
1022 	err = inet6_register_icmp_sender(icmp6_send);
1023 	if (err)
1024 		goto sender_reg_err;
1025 	return 0;
1026 
1027 sender_reg_err:
1028 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1029 fail:
1030 	pr_err("Failed to register ICMP6 protocol\n");
1031 	unregister_pernet_subsys(&icmpv6_sk_ops);
1032 	return err;
1033 }
1034 
1035 void icmpv6_cleanup(void)
1036 {
1037 	inet6_unregister_icmp_sender(icmp6_send);
1038 	unregister_pernet_subsys(&icmpv6_sk_ops);
1039 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1040 }
1041 
1042 
1043 static const struct icmp6_err {
1044 	int err;
1045 	int fatal;
1046 } tab_unreach[] = {
1047 	{	/* NOROUTE */
1048 		.err	= ENETUNREACH,
1049 		.fatal	= 0,
1050 	},
1051 	{	/* ADM_PROHIBITED */
1052 		.err	= EACCES,
1053 		.fatal	= 1,
1054 	},
1055 	{	/* Was NOT_NEIGHBOUR, now reserved */
1056 		.err	= EHOSTUNREACH,
1057 		.fatal	= 0,
1058 	},
1059 	{	/* ADDR_UNREACH	*/
1060 		.err	= EHOSTUNREACH,
1061 		.fatal	= 0,
1062 	},
1063 	{	/* PORT_UNREACH	*/
1064 		.err	= ECONNREFUSED,
1065 		.fatal	= 1,
1066 	},
1067 	{	/* POLICY_FAIL */
1068 		.err	= EACCES,
1069 		.fatal	= 1,
1070 	},
1071 	{	/* REJECT_ROUTE	*/
1072 		.err	= EACCES,
1073 		.fatal	= 1,
1074 	},
1075 };
1076 
1077 int icmpv6_err_convert(u8 type, u8 code, int *err)
1078 {
1079 	int fatal = 0;
1080 
1081 	*err = EPROTO;
1082 
1083 	switch (type) {
1084 	case ICMPV6_DEST_UNREACH:
1085 		fatal = 1;
1086 		if (code < ARRAY_SIZE(tab_unreach)) {
1087 			*err  = tab_unreach[code].err;
1088 			fatal = tab_unreach[code].fatal;
1089 		}
1090 		break;
1091 
1092 	case ICMPV6_PKT_TOOBIG:
1093 		*err = EMSGSIZE;
1094 		break;
1095 
1096 	case ICMPV6_PARAMPROB:
1097 		*err = EPROTO;
1098 		fatal = 1;
1099 		break;
1100 
1101 	case ICMPV6_TIME_EXCEED:
1102 		*err = EHOSTUNREACH;
1103 		break;
1104 	}
1105 
1106 	return fatal;
1107 }
1108 EXPORT_SYMBOL(icmpv6_err_convert);
1109 
1110 #ifdef CONFIG_SYSCTL
1111 static struct ctl_table ipv6_icmp_table_template[] = {
1112 	{
1113 		.procname	= "ratelimit",
1114 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1115 		.maxlen		= sizeof(int),
1116 		.mode		= 0644,
1117 		.proc_handler	= proc_dointvec_ms_jiffies,
1118 	},
1119 	{
1120 		.procname	= "echo_ignore_all",
1121 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1122 		.maxlen		= sizeof(int),
1123 		.mode		= 0644,
1124 		.proc_handler = proc_dointvec,
1125 	},
1126 	{
1127 		.procname	= "echo_ignore_multicast",
1128 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1129 		.maxlen		= sizeof(int),
1130 		.mode		= 0644,
1131 		.proc_handler = proc_dointvec,
1132 	},
1133 	{
1134 		.procname	= "echo_ignore_anycast",
1135 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1136 		.maxlen		= sizeof(int),
1137 		.mode		= 0644,
1138 		.proc_handler = proc_dointvec,
1139 	},
1140 	{ },
1141 };
1142 
1143 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1144 {
1145 	struct ctl_table *table;
1146 
1147 	table = kmemdup(ipv6_icmp_table_template,
1148 			sizeof(ipv6_icmp_table_template),
1149 			GFP_KERNEL);
1150 
1151 	if (table) {
1152 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1153 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1154 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1155 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1156 	}
1157 	return table;
1158 }
1159 #endif
1160