xref: /openbmc/linux/net/ipv6/icmp.c (revision 83268fa6b43cefb60ee188fd53ed49120d3ae4f4)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 /* Called with BH disabled */
114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
115 {
116 	struct sock *sk;
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 static bool icmpv6_mask_allow(int type)
170 {
171 	/* Informational messages are not limited. */
172 	if (type & ICMPV6_INFOMSG_MASK)
173 		return true;
174 
175 	/* Do not limit pmtu discovery, it would break it. */
176 	if (type == ICMPV6_PKT_TOOBIG)
177 		return true;
178 
179 	return false;
180 }
181 
182 static bool icmpv6_global_allow(int type)
183 {
184 	if (icmpv6_mask_allow(type))
185 		return true;
186 
187 	if (icmp_global_allow())
188 		return true;
189 
190 	return false;
191 }
192 
193 /*
194  * Check the ICMP output rate limit
195  */
196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
197 			       struct flowi6 *fl6)
198 {
199 	struct net *net = sock_net(sk);
200 	struct dst_entry *dst;
201 	bool res = false;
202 
203 	if (icmpv6_mask_allow(type))
204 		return true;
205 
206 	/*
207 	 * Look up the output route.
208 	 * XXX: perhaps the expire for routing entries cloned by
209 	 * this lookup should be more aggressive (not longer than timeout).
210 	 */
211 	dst = ip6_route_output(net, sk, fl6);
212 	if (dst->error) {
213 		IP6_INC_STATS(net, ip6_dst_idev(dst),
214 			      IPSTATS_MIB_OUTNOROUTES);
215 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216 		res = true;
217 	} else {
218 		struct rt6_info *rt = (struct rt6_info *)dst;
219 		int tmo = net->ipv6.sysctl.icmpv6_time;
220 		struct inet_peer *peer;
221 
222 		/* Give more bandwidth to wider prefixes. */
223 		if (rt->rt6i_dst.plen < 128)
224 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225 
226 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
227 		res = inet_peer_xrlim_allow(peer, tmo);
228 		if (peer)
229 			inet_putpeer(peer);
230 	}
231 	dst_release(dst);
232 	return res;
233 }
234 
235 /*
236  *	an inline helper for the "simple" if statement below
237  *	checks if parameter problem report is caused by an
238  *	unrecognized IPv6 option that has the Option Type
239  *	highest-order two bits set to 10
240  */
241 
242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
243 {
244 	u8 _optval, *op;
245 
246 	offset += skb_network_offset(skb);
247 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248 	if (!op)
249 		return true;
250 	return (*op & 0xC0) == 0x80;
251 }
252 
253 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 				struct icmp6hdr *thdr, int len)
255 {
256 	struct sk_buff *skb;
257 	struct icmp6hdr *icmp6h;
258 
259 	skb = skb_peek(&sk->sk_write_queue);
260 	if (!skb)
261 		return;
262 
263 	icmp6h = icmp6_hdr(skb);
264 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
265 	icmp6h->icmp6_cksum = 0;
266 
267 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
268 		skb->csum = csum_partial(icmp6h,
269 					sizeof(struct icmp6hdr), skb->csum);
270 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
271 						      &fl6->daddr,
272 						      len, fl6->flowi6_proto,
273 						      skb->csum);
274 	} else {
275 		__wsum tmp_csum = 0;
276 
277 		skb_queue_walk(&sk->sk_write_queue, skb) {
278 			tmp_csum = csum_add(tmp_csum, skb->csum);
279 		}
280 
281 		tmp_csum = csum_partial(icmp6h,
282 					sizeof(struct icmp6hdr), tmp_csum);
283 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
284 						      &fl6->daddr,
285 						      len, fl6->flowi6_proto,
286 						      tmp_csum);
287 	}
288 	ip6_push_pending_frames(sk);
289 }
290 
291 struct icmpv6_msg {
292 	struct sk_buff	*skb;
293 	int		offset;
294 	uint8_t		type;
295 };
296 
297 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
298 {
299 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
300 	struct sk_buff *org_skb = msg->skb;
301 	__wsum csum = 0;
302 
303 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
304 				      to, len, csum);
305 	skb->csum = csum_block_add(skb->csum, csum, odd);
306 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
307 		nf_ct_attach(skb, org_skb);
308 	return 0;
309 }
310 
311 #if IS_ENABLED(CONFIG_IPV6_MIP6)
312 static void mip6_addr_swap(struct sk_buff *skb)
313 {
314 	struct ipv6hdr *iph = ipv6_hdr(skb);
315 	struct inet6_skb_parm *opt = IP6CB(skb);
316 	struct ipv6_destopt_hao *hao;
317 	struct in6_addr tmp;
318 	int off;
319 
320 	if (opt->dsthao) {
321 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
322 		if (likely(off >= 0)) {
323 			hao = (struct ipv6_destopt_hao *)
324 					(skb_network_header(skb) + off);
325 			tmp = iph->saddr;
326 			iph->saddr = hao->addr;
327 			hao->addr = tmp;
328 		}
329 	}
330 }
331 #else
332 static inline void mip6_addr_swap(struct sk_buff *skb) {}
333 #endif
334 
335 static struct dst_entry *icmpv6_route_lookup(struct net *net,
336 					     struct sk_buff *skb,
337 					     struct sock *sk,
338 					     struct flowi6 *fl6)
339 {
340 	struct dst_entry *dst, *dst2;
341 	struct flowi6 fl2;
342 	int err;
343 
344 	err = ip6_dst_lookup(net, sk, &dst, fl6);
345 	if (err)
346 		return ERR_PTR(err);
347 
348 	/*
349 	 * We won't send icmp if the destination is known
350 	 * anycast.
351 	 */
352 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
353 		net_dbg_ratelimited("icmp6_send: acast source\n");
354 		dst_release(dst);
355 		return ERR_PTR(-EINVAL);
356 	}
357 
358 	/* No need to clone since we're just using its address. */
359 	dst2 = dst;
360 
361 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
362 	if (!IS_ERR(dst)) {
363 		if (dst != dst2)
364 			return dst;
365 	} else {
366 		if (PTR_ERR(dst) == -EPERM)
367 			dst = NULL;
368 		else
369 			return dst;
370 	}
371 
372 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
373 	if (err)
374 		goto relookup_failed;
375 
376 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
377 	if (err)
378 		goto relookup_failed;
379 
380 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
381 	if (!IS_ERR(dst2)) {
382 		dst_release(dst);
383 		dst = dst2;
384 	} else {
385 		err = PTR_ERR(dst2);
386 		if (err == -EPERM) {
387 			dst_release(dst);
388 			return dst2;
389 		} else
390 			goto relookup_failed;
391 	}
392 
393 relookup_failed:
394 	if (dst)
395 		return dst;
396 	return ERR_PTR(err);
397 }
398 
399 static int icmp6_iif(const struct sk_buff *skb)
400 {
401 	int iif = skb->dev->ifindex;
402 
403 	/* for local traffic to local address, skb dev is the loopback
404 	 * device. Check if there is a dst attached to the skb and if so
405 	 * get the real device index. Same is needed for replies to a link
406 	 * local address on a device enslaved to an L3 master device
407 	 */
408 	if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
409 		const struct rt6_info *rt6 = skb_rt6_info(skb);
410 
411 		if (rt6)
412 			iif = rt6->rt6i_idev->dev->ifindex;
413 	}
414 
415 	return iif;
416 }
417 
418 /*
419  *	Send an ICMP message in response to a packet in error
420  */
421 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
422 		       const struct in6_addr *force_saddr)
423 {
424 	struct net *net = dev_net(skb->dev);
425 	struct inet6_dev *idev = NULL;
426 	struct ipv6hdr *hdr = ipv6_hdr(skb);
427 	struct sock *sk;
428 	struct ipv6_pinfo *np;
429 	const struct in6_addr *saddr = NULL;
430 	struct dst_entry *dst;
431 	struct icmp6hdr tmp_hdr;
432 	struct flowi6 fl6;
433 	struct icmpv6_msg msg;
434 	struct ipcm6_cookie ipc6;
435 	int iif = 0;
436 	int addr_type = 0;
437 	int len;
438 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
439 
440 	if ((u8 *)hdr < skb->head ||
441 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
442 		return;
443 
444 	/*
445 	 *	Make sure we respect the rules
446 	 *	i.e. RFC 1885 2.4(e)
447 	 *	Rule (e.1) is enforced by not using icmp6_send
448 	 *	in any code that processes icmp errors.
449 	 */
450 	addr_type = ipv6_addr_type(&hdr->daddr);
451 
452 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
453 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
454 		saddr = &hdr->daddr;
455 
456 	/*
457 	 *	Dest addr check
458 	 */
459 
460 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
461 		if (type != ICMPV6_PKT_TOOBIG &&
462 		    !(type == ICMPV6_PARAMPROB &&
463 		      code == ICMPV6_UNK_OPTION &&
464 		      (opt_unrec(skb, info))))
465 			return;
466 
467 		saddr = NULL;
468 	}
469 
470 	addr_type = ipv6_addr_type(&hdr->saddr);
471 
472 	/*
473 	 *	Source addr check
474 	 */
475 
476 	if (__ipv6_addr_needs_scope_id(addr_type)) {
477 		iif = icmp6_iif(skb);
478 	} else {
479 		dst = skb_dst(skb);
480 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
481 	}
482 
483 	/*
484 	 *	Must not send error if the source does not uniquely
485 	 *	identify a single node (RFC2463 Section 2.4).
486 	 *	We check unspecified / multicast addresses here,
487 	 *	and anycast addresses will be checked later.
488 	 */
489 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
490 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
491 				    &hdr->saddr, &hdr->daddr);
492 		return;
493 	}
494 
495 	/*
496 	 *	Never answer to a ICMP packet.
497 	 */
498 	if (is_ineligible(skb)) {
499 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
500 				    &hdr->saddr, &hdr->daddr);
501 		return;
502 	}
503 
504 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
505 	local_bh_disable();
506 
507 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
508 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
509 		goto out_bh_enable;
510 
511 	mip6_addr_swap(skb);
512 
513 	memset(&fl6, 0, sizeof(fl6));
514 	fl6.flowi6_proto = IPPROTO_ICMPV6;
515 	fl6.daddr = hdr->saddr;
516 	if (force_saddr)
517 		saddr = force_saddr;
518 	if (saddr)
519 		fl6.saddr = *saddr;
520 	fl6.flowi6_mark = mark;
521 	fl6.flowi6_oif = iif;
522 	fl6.fl6_icmp_type = type;
523 	fl6.fl6_icmp_code = code;
524 	fl6.flowi6_uid = sock_net_uid(net, NULL);
525 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
526 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
527 
528 	sk = icmpv6_xmit_lock(net);
529 	if (!sk)
530 		goto out_bh_enable;
531 
532 	sk->sk_mark = mark;
533 	np = inet6_sk(sk);
534 
535 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
536 		goto out;
537 
538 	tmp_hdr.icmp6_type = type;
539 	tmp_hdr.icmp6_code = code;
540 	tmp_hdr.icmp6_cksum = 0;
541 	tmp_hdr.icmp6_pointer = htonl(info);
542 
543 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
544 		fl6.flowi6_oif = np->mcast_oif;
545 	else if (!fl6.flowi6_oif)
546 		fl6.flowi6_oif = np->ucast_oif;
547 
548 	ipcm6_init_sk(&ipc6, np);
549 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
550 
551 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
552 	if (IS_ERR(dst))
553 		goto out;
554 
555 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
556 
557 	msg.skb = skb;
558 	msg.offset = skb_network_offset(skb);
559 	msg.type = type;
560 
561 	len = skb->len - msg.offset;
562 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
563 	if (len < 0) {
564 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
565 				    &hdr->saddr, &hdr->daddr);
566 		goto out_dst_release;
567 	}
568 
569 	rcu_read_lock();
570 	idev = __in6_dev_get(skb->dev);
571 
572 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
573 			    len + sizeof(struct icmp6hdr),
574 			    sizeof(struct icmp6hdr),
575 			    &ipc6, &fl6, (struct rt6_info *)dst,
576 			    MSG_DONTWAIT)) {
577 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
578 		ip6_flush_pending_frames(sk);
579 	} else {
580 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
581 					   len + sizeof(struct icmp6hdr));
582 	}
583 	rcu_read_unlock();
584 out_dst_release:
585 	dst_release(dst);
586 out:
587 	icmpv6_xmit_unlock(sk);
588 out_bh_enable:
589 	local_bh_enable();
590 }
591 
592 /* Slightly more convenient version of icmp6_send.
593  */
594 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
595 {
596 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
597 	kfree_skb(skb);
598 }
599 
600 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
601  * if sufficient data bytes are available
602  * @nhs is the size of the tunnel header(s) :
603  *  Either an IPv4 header for SIT encap
604  *         an IPv4 header + GRE header for GRE encap
605  */
606 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
607 			       unsigned int data_len)
608 {
609 	struct in6_addr temp_saddr;
610 	struct rt6_info *rt;
611 	struct sk_buff *skb2;
612 	u32 info = 0;
613 
614 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
615 		return 1;
616 
617 	/* RFC 4884 (partial) support for ICMP extensions */
618 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
619 		data_len = 0;
620 
621 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
622 
623 	if (!skb2)
624 		return 1;
625 
626 	skb_dst_drop(skb2);
627 	skb_pull(skb2, nhs);
628 	skb_reset_network_header(skb2);
629 
630 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
631 			skb, 0);
632 
633 	if (rt && rt->dst.dev)
634 		skb2->dev = rt->dst.dev;
635 
636 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
637 
638 	if (data_len) {
639 		/* RFC 4884 (partial) support :
640 		 * insert 0 padding at the end, before the extensions
641 		 */
642 		__skb_push(skb2, nhs);
643 		skb_reset_network_header(skb2);
644 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
645 		memset(skb2->data + data_len - nhs, 0, nhs);
646 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
647 		 * and stored in reserved[0]
648 		 */
649 		info = (data_len/8) << 24;
650 	}
651 	if (type == ICMP_TIME_EXCEEDED)
652 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
653 			   info, &temp_saddr);
654 	else
655 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
656 			   info, &temp_saddr);
657 	if (rt)
658 		ip6_rt_put(rt);
659 
660 	kfree_skb(skb2);
661 
662 	return 0;
663 }
664 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
665 
666 static void icmpv6_echo_reply(struct sk_buff *skb)
667 {
668 	struct net *net = dev_net(skb->dev);
669 	struct sock *sk;
670 	struct inet6_dev *idev;
671 	struct ipv6_pinfo *np;
672 	const struct in6_addr *saddr = NULL;
673 	struct icmp6hdr *icmph = icmp6_hdr(skb);
674 	struct icmp6hdr tmp_hdr;
675 	struct flowi6 fl6;
676 	struct icmpv6_msg msg;
677 	struct dst_entry *dst;
678 	struct ipcm6_cookie ipc6;
679 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
680 
681 	saddr = &ipv6_hdr(skb)->daddr;
682 
683 	if (!ipv6_unicast_destination(skb) &&
684 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
685 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
686 		saddr = NULL;
687 
688 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
689 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
690 
691 	memset(&fl6, 0, sizeof(fl6));
692 	fl6.flowi6_proto = IPPROTO_ICMPV6;
693 	fl6.daddr = ipv6_hdr(skb)->saddr;
694 	if (saddr)
695 		fl6.saddr = *saddr;
696 	fl6.flowi6_oif = icmp6_iif(skb);
697 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
698 	fl6.flowi6_mark = mark;
699 	fl6.flowi6_uid = sock_net_uid(net, NULL);
700 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
701 
702 	local_bh_disable();
703 	sk = icmpv6_xmit_lock(net);
704 	if (!sk)
705 		goto out_bh_enable;
706 	sk->sk_mark = mark;
707 	np = inet6_sk(sk);
708 
709 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
710 		fl6.flowi6_oif = np->mcast_oif;
711 	else if (!fl6.flowi6_oif)
712 		fl6.flowi6_oif = np->ucast_oif;
713 
714 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
715 		goto out;
716 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
717 	if (IS_ERR(dst))
718 		goto out;
719 
720 	idev = __in6_dev_get(skb->dev);
721 
722 	msg.skb = skb;
723 	msg.offset = 0;
724 	msg.type = ICMPV6_ECHO_REPLY;
725 
726 	ipcm6_init_sk(&ipc6, np);
727 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
728 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
729 
730 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
731 			    skb->len + sizeof(struct icmp6hdr),
732 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
733 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
734 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
735 		ip6_flush_pending_frames(sk);
736 	} else {
737 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
738 					   skb->len + sizeof(struct icmp6hdr));
739 	}
740 	dst_release(dst);
741 out:
742 	icmpv6_xmit_unlock(sk);
743 out_bh_enable:
744 	local_bh_enable();
745 }
746 
747 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
748 {
749 	const struct inet6_protocol *ipprot;
750 	int inner_offset;
751 	__be16 frag_off;
752 	u8 nexthdr;
753 	struct net *net = dev_net(skb->dev);
754 
755 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
756 		goto out;
757 
758 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
759 	if (ipv6_ext_hdr(nexthdr)) {
760 		/* now skip over extension headers */
761 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
762 						&nexthdr, &frag_off);
763 		if (inner_offset < 0)
764 			goto out;
765 	} else {
766 		inner_offset = sizeof(struct ipv6hdr);
767 	}
768 
769 	/* Checkin header including 8 bytes of inner protocol header. */
770 	if (!pskb_may_pull(skb, inner_offset+8))
771 		goto out;
772 
773 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
774 	   Without this we will not able f.e. to make source routed
775 	   pmtu discovery.
776 	   Corresponding argument (opt) to notifiers is already added.
777 	   --ANK (980726)
778 	 */
779 
780 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
781 	if (ipprot && ipprot->err_handler)
782 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
783 
784 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
785 	return;
786 
787 out:
788 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
789 }
790 
791 /*
792  *	Handle icmp messages
793  */
794 
795 static int icmpv6_rcv(struct sk_buff *skb)
796 {
797 	struct net *net = dev_net(skb->dev);
798 	struct net_device *dev = skb->dev;
799 	struct inet6_dev *idev = __in6_dev_get(dev);
800 	const struct in6_addr *saddr, *daddr;
801 	struct icmp6hdr *hdr;
802 	u8 type;
803 	bool success = false;
804 
805 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
806 		struct sec_path *sp = skb_sec_path(skb);
807 		int nh;
808 
809 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
810 				 XFRM_STATE_ICMP))
811 			goto drop_no_count;
812 
813 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
814 			goto drop_no_count;
815 
816 		nh = skb_network_offset(skb);
817 		skb_set_network_header(skb, sizeof(*hdr));
818 
819 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
820 			goto drop_no_count;
821 
822 		skb_set_network_header(skb, nh);
823 	}
824 
825 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
826 
827 	saddr = &ipv6_hdr(skb)->saddr;
828 	daddr = &ipv6_hdr(skb)->daddr;
829 
830 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
831 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
832 				    saddr, daddr);
833 		goto csum_error;
834 	}
835 
836 	if (!pskb_pull(skb, sizeof(*hdr)))
837 		goto discard_it;
838 
839 	hdr = icmp6_hdr(skb);
840 
841 	type = hdr->icmp6_type;
842 
843 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
844 
845 	switch (type) {
846 	case ICMPV6_ECHO_REQUEST:
847 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
848 			icmpv6_echo_reply(skb);
849 		break;
850 
851 	case ICMPV6_ECHO_REPLY:
852 		success = ping_rcv(skb);
853 		break;
854 
855 	case ICMPV6_PKT_TOOBIG:
856 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
857 		   standard destination cache. Seems, only "advanced"
858 		   destination cache will allow to solve this problem
859 		   --ANK (980726)
860 		 */
861 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
862 			goto discard_it;
863 		hdr = icmp6_hdr(skb);
864 
865 		/* to notify */
866 		/* fall through */
867 	case ICMPV6_DEST_UNREACH:
868 	case ICMPV6_TIME_EXCEED:
869 	case ICMPV6_PARAMPROB:
870 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
871 		break;
872 
873 	case NDISC_ROUTER_SOLICITATION:
874 	case NDISC_ROUTER_ADVERTISEMENT:
875 	case NDISC_NEIGHBOUR_SOLICITATION:
876 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
877 	case NDISC_REDIRECT:
878 		ndisc_rcv(skb);
879 		break;
880 
881 	case ICMPV6_MGM_QUERY:
882 		igmp6_event_query(skb);
883 		break;
884 
885 	case ICMPV6_MGM_REPORT:
886 		igmp6_event_report(skb);
887 		break;
888 
889 	case ICMPV6_MGM_REDUCTION:
890 	case ICMPV6_NI_QUERY:
891 	case ICMPV6_NI_REPLY:
892 	case ICMPV6_MLD2_REPORT:
893 	case ICMPV6_DHAAD_REQUEST:
894 	case ICMPV6_DHAAD_REPLY:
895 	case ICMPV6_MOBILE_PREFIX_SOL:
896 	case ICMPV6_MOBILE_PREFIX_ADV:
897 		break;
898 
899 	default:
900 		/* informational */
901 		if (type & ICMPV6_INFOMSG_MASK)
902 			break;
903 
904 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
905 				    saddr, daddr);
906 
907 		/*
908 		 * error of unknown type.
909 		 * must pass to upper level
910 		 */
911 
912 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
913 	}
914 
915 	/* until the v6 path can be better sorted assume failure and
916 	 * preserve the status quo behaviour for the rest of the paths to here
917 	 */
918 	if (success)
919 		consume_skb(skb);
920 	else
921 		kfree_skb(skb);
922 
923 	return 0;
924 
925 csum_error:
926 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
927 discard_it:
928 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
929 drop_no_count:
930 	kfree_skb(skb);
931 	return 0;
932 }
933 
934 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
935 		      u8 type,
936 		      const struct in6_addr *saddr,
937 		      const struct in6_addr *daddr,
938 		      int oif)
939 {
940 	memset(fl6, 0, sizeof(*fl6));
941 	fl6->saddr = *saddr;
942 	fl6->daddr = *daddr;
943 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
944 	fl6->fl6_icmp_type	= type;
945 	fl6->fl6_icmp_code	= 0;
946 	fl6->flowi6_oif		= oif;
947 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
948 }
949 
950 static int __net_init icmpv6_sk_init(struct net *net)
951 {
952 	struct sock *sk;
953 	int err, i, j;
954 
955 	net->ipv6.icmp_sk =
956 		kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
957 	if (!net->ipv6.icmp_sk)
958 		return -ENOMEM;
959 
960 	for_each_possible_cpu(i) {
961 		err = inet_ctl_sock_create(&sk, PF_INET6,
962 					   SOCK_RAW, IPPROTO_ICMPV6, net);
963 		if (err < 0) {
964 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
965 			       err);
966 			goto fail;
967 		}
968 
969 		net->ipv6.icmp_sk[i] = sk;
970 
971 		/* Enough space for 2 64K ICMP packets, including
972 		 * sk_buff struct overhead.
973 		 */
974 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
975 	}
976 	return 0;
977 
978  fail:
979 	for (j = 0; j < i; j++)
980 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
981 	kfree(net->ipv6.icmp_sk);
982 	return err;
983 }
984 
985 static void __net_exit icmpv6_sk_exit(struct net *net)
986 {
987 	int i;
988 
989 	for_each_possible_cpu(i) {
990 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
991 	}
992 	kfree(net->ipv6.icmp_sk);
993 }
994 
995 static struct pernet_operations icmpv6_sk_ops = {
996 	.init = icmpv6_sk_init,
997 	.exit = icmpv6_sk_exit,
998 };
999 
1000 int __init icmpv6_init(void)
1001 {
1002 	int err;
1003 
1004 	err = register_pernet_subsys(&icmpv6_sk_ops);
1005 	if (err < 0)
1006 		return err;
1007 
1008 	err = -EAGAIN;
1009 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1010 		goto fail;
1011 
1012 	err = inet6_register_icmp_sender(icmp6_send);
1013 	if (err)
1014 		goto sender_reg_err;
1015 	return 0;
1016 
1017 sender_reg_err:
1018 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1019 fail:
1020 	pr_err("Failed to register ICMP6 protocol\n");
1021 	unregister_pernet_subsys(&icmpv6_sk_ops);
1022 	return err;
1023 }
1024 
1025 void icmpv6_cleanup(void)
1026 {
1027 	inet6_unregister_icmp_sender(icmp6_send);
1028 	unregister_pernet_subsys(&icmpv6_sk_ops);
1029 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1030 }
1031 
1032 
1033 static const struct icmp6_err {
1034 	int err;
1035 	int fatal;
1036 } tab_unreach[] = {
1037 	{	/* NOROUTE */
1038 		.err	= ENETUNREACH,
1039 		.fatal	= 0,
1040 	},
1041 	{	/* ADM_PROHIBITED */
1042 		.err	= EACCES,
1043 		.fatal	= 1,
1044 	},
1045 	{	/* Was NOT_NEIGHBOUR, now reserved */
1046 		.err	= EHOSTUNREACH,
1047 		.fatal	= 0,
1048 	},
1049 	{	/* ADDR_UNREACH	*/
1050 		.err	= EHOSTUNREACH,
1051 		.fatal	= 0,
1052 	},
1053 	{	/* PORT_UNREACH	*/
1054 		.err	= ECONNREFUSED,
1055 		.fatal	= 1,
1056 	},
1057 	{	/* POLICY_FAIL */
1058 		.err	= EACCES,
1059 		.fatal	= 1,
1060 	},
1061 	{	/* REJECT_ROUTE	*/
1062 		.err	= EACCES,
1063 		.fatal	= 1,
1064 	},
1065 };
1066 
1067 int icmpv6_err_convert(u8 type, u8 code, int *err)
1068 {
1069 	int fatal = 0;
1070 
1071 	*err = EPROTO;
1072 
1073 	switch (type) {
1074 	case ICMPV6_DEST_UNREACH:
1075 		fatal = 1;
1076 		if (code < ARRAY_SIZE(tab_unreach)) {
1077 			*err  = tab_unreach[code].err;
1078 			fatal = tab_unreach[code].fatal;
1079 		}
1080 		break;
1081 
1082 	case ICMPV6_PKT_TOOBIG:
1083 		*err = EMSGSIZE;
1084 		break;
1085 
1086 	case ICMPV6_PARAMPROB:
1087 		*err = EPROTO;
1088 		fatal = 1;
1089 		break;
1090 
1091 	case ICMPV6_TIME_EXCEED:
1092 		*err = EHOSTUNREACH;
1093 		break;
1094 	}
1095 
1096 	return fatal;
1097 }
1098 EXPORT_SYMBOL(icmpv6_err_convert);
1099 
1100 #ifdef CONFIG_SYSCTL
1101 static struct ctl_table ipv6_icmp_table_template[] = {
1102 	{
1103 		.procname	= "ratelimit",
1104 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1105 		.maxlen		= sizeof(int),
1106 		.mode		= 0644,
1107 		.proc_handler	= proc_dointvec_ms_jiffies,
1108 	},
1109 	{
1110 		.procname	= "echo_ignore_all",
1111 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1112 		.maxlen		= sizeof(int),
1113 		.mode		= 0644,
1114 		.proc_handler = proc_dointvec,
1115 	},
1116 	{ },
1117 };
1118 
1119 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1120 {
1121 	struct ctl_table *table;
1122 
1123 	table = kmemdup(ipv6_icmp_table_template,
1124 			sizeof(ipv6_icmp_table_template),
1125 			GFP_KERNEL);
1126 
1127 	if (table) {
1128 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1129 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1130 	}
1131 	return table;
1132 }
1133 #endif
1134