xref: /openbmc/linux/net/ipv6/icmp.c (revision df687341)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 
104 	return 0;
105 }
106 
107 static int icmpv6_rcv(struct sk_buff *skb);
108 
109 static const struct inet6_protocol icmpv6_protocol = {
110 	.handler	=	icmpv6_rcv,
111 	.err_handler	=	icmpv6_err,
112 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
113 };
114 
115 /* Called with BH disabled */
116 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
117 {
118 	struct sock *sk;
119 
120 	sk = icmpv6_sk(net);
121 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
122 		/* This can happen if the output path (f.e. SIT or
123 		 * ip6ip6 tunnel) signals dst_link_failure() for an
124 		 * outgoing ICMP6 packet.
125 		 */
126 		return NULL;
127 	}
128 	return sk;
129 }
130 
131 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
132 {
133 	spin_unlock(&sk->sk_lock.slock);
134 }
135 
136 /*
137  * Figure out, may we reply to this packet with icmp error.
138  *
139  * We do not reply, if:
140  *	- it was icmp error message.
141  *	- it is truncated, so that it is known, that protocol is ICMPV6
142  *	  (i.e. in the middle of some exthdr)
143  *
144  *	--ANK (980726)
145  */
146 
147 static bool is_ineligible(const struct sk_buff *skb)
148 {
149 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
150 	int len = skb->len - ptr;
151 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
152 	__be16 frag_off;
153 
154 	if (len < 0)
155 		return true;
156 
157 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
158 	if (ptr < 0)
159 		return false;
160 	if (nexthdr == IPPROTO_ICMPV6) {
161 		u8 _type, *tp;
162 		tp = skb_header_pointer(skb,
163 			ptr+offsetof(struct icmp6hdr, icmp6_type),
164 			sizeof(_type), &_type);
165 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
166 			return true;
167 	}
168 	return false;
169 }
170 
171 static bool icmpv6_mask_allow(int type)
172 {
173 	/* Informational messages are not limited. */
174 	if (type & ICMPV6_INFOMSG_MASK)
175 		return true;
176 
177 	/* Do not limit pmtu discovery, it would break it. */
178 	if (type == ICMPV6_PKT_TOOBIG)
179 		return true;
180 
181 	return false;
182 }
183 
184 static bool icmpv6_global_allow(int type)
185 {
186 	if (icmpv6_mask_allow(type))
187 		return true;
188 
189 	if (icmp_global_allow())
190 		return true;
191 
192 	return false;
193 }
194 
195 /*
196  * Check the ICMP output rate limit
197  */
198 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
199 			       struct flowi6 *fl6)
200 {
201 	struct net *net = sock_net(sk);
202 	struct dst_entry *dst;
203 	bool res = false;
204 
205 	if (icmpv6_mask_allow(type))
206 		return true;
207 
208 	/*
209 	 * Look up the output route.
210 	 * XXX: perhaps the expire for routing entries cloned by
211 	 * this lookup should be more aggressive (not longer than timeout).
212 	 */
213 	dst = ip6_route_output(net, sk, fl6);
214 	if (dst->error) {
215 		IP6_INC_STATS(net, ip6_dst_idev(dst),
216 			      IPSTATS_MIB_OUTNOROUTES);
217 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
218 		res = true;
219 	} else {
220 		struct rt6_info *rt = (struct rt6_info *)dst;
221 		int tmo = net->ipv6.sysctl.icmpv6_time;
222 		struct inet_peer *peer;
223 
224 		/* Give more bandwidth to wider prefixes. */
225 		if (rt->rt6i_dst.plen < 128)
226 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
227 
228 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
229 		res = inet_peer_xrlim_allow(peer, tmo);
230 		if (peer)
231 			inet_putpeer(peer);
232 	}
233 	dst_release(dst);
234 	return res;
235 }
236 
237 /*
238  *	an inline helper for the "simple" if statement below
239  *	checks if parameter problem report is caused by an
240  *	unrecognized IPv6 option that has the Option Type
241  *	highest-order two bits set to 10
242  */
243 
244 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
245 {
246 	u8 _optval, *op;
247 
248 	offset += skb_network_offset(skb);
249 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
250 	if (!op)
251 		return true;
252 	return (*op & 0xC0) == 0x80;
253 }
254 
255 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
256 				struct icmp6hdr *thdr, int len)
257 {
258 	struct sk_buff *skb;
259 	struct icmp6hdr *icmp6h;
260 
261 	skb = skb_peek(&sk->sk_write_queue);
262 	if (!skb)
263 		return;
264 
265 	icmp6h = icmp6_hdr(skb);
266 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
267 	icmp6h->icmp6_cksum = 0;
268 
269 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
270 		skb->csum = csum_partial(icmp6h,
271 					sizeof(struct icmp6hdr), skb->csum);
272 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
273 						      &fl6->daddr,
274 						      len, fl6->flowi6_proto,
275 						      skb->csum);
276 	} else {
277 		__wsum tmp_csum = 0;
278 
279 		skb_queue_walk(&sk->sk_write_queue, skb) {
280 			tmp_csum = csum_add(tmp_csum, skb->csum);
281 		}
282 
283 		tmp_csum = csum_partial(icmp6h,
284 					sizeof(struct icmp6hdr), tmp_csum);
285 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
286 						      &fl6->daddr,
287 						      len, fl6->flowi6_proto,
288 						      tmp_csum);
289 	}
290 	ip6_push_pending_frames(sk);
291 }
292 
293 struct icmpv6_msg {
294 	struct sk_buff	*skb;
295 	int		offset;
296 	uint8_t		type;
297 };
298 
299 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
300 {
301 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
302 	struct sk_buff *org_skb = msg->skb;
303 	__wsum csum = 0;
304 
305 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
306 				      to, len, csum);
307 	skb->csum = csum_block_add(skb->csum, csum, odd);
308 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
309 		nf_ct_attach(skb, org_skb);
310 	return 0;
311 }
312 
313 #if IS_ENABLED(CONFIG_IPV6_MIP6)
314 static void mip6_addr_swap(struct sk_buff *skb)
315 {
316 	struct ipv6hdr *iph = ipv6_hdr(skb);
317 	struct inet6_skb_parm *opt = IP6CB(skb);
318 	struct ipv6_destopt_hao *hao;
319 	struct in6_addr tmp;
320 	int off;
321 
322 	if (opt->dsthao) {
323 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
324 		if (likely(off >= 0)) {
325 			hao = (struct ipv6_destopt_hao *)
326 					(skb_network_header(skb) + off);
327 			tmp = iph->saddr;
328 			iph->saddr = hao->addr;
329 			hao->addr = tmp;
330 		}
331 	}
332 }
333 #else
334 static inline void mip6_addr_swap(struct sk_buff *skb) {}
335 #endif
336 
337 static struct dst_entry *icmpv6_route_lookup(struct net *net,
338 					     struct sk_buff *skb,
339 					     struct sock *sk,
340 					     struct flowi6 *fl6)
341 {
342 	struct dst_entry *dst, *dst2;
343 	struct flowi6 fl2;
344 	int err;
345 
346 	err = ip6_dst_lookup(net, sk, &dst, fl6);
347 	if (err)
348 		return ERR_PTR(err);
349 
350 	/*
351 	 * We won't send icmp if the destination is known
352 	 * anycast.
353 	 */
354 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
355 		net_dbg_ratelimited("icmp6_send: acast source\n");
356 		dst_release(dst);
357 		return ERR_PTR(-EINVAL);
358 	}
359 
360 	/* No need to clone since we're just using its address. */
361 	dst2 = dst;
362 
363 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
364 	if (!IS_ERR(dst)) {
365 		if (dst != dst2)
366 			return dst;
367 	} else {
368 		if (PTR_ERR(dst) == -EPERM)
369 			dst = NULL;
370 		else
371 			return dst;
372 	}
373 
374 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
375 	if (err)
376 		goto relookup_failed;
377 
378 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
379 	if (err)
380 		goto relookup_failed;
381 
382 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
383 	if (!IS_ERR(dst2)) {
384 		dst_release(dst);
385 		dst = dst2;
386 	} else {
387 		err = PTR_ERR(dst2);
388 		if (err == -EPERM) {
389 			dst_release(dst);
390 			return dst2;
391 		} else
392 			goto relookup_failed;
393 	}
394 
395 relookup_failed:
396 	if (dst)
397 		return dst;
398 	return ERR_PTR(err);
399 }
400 
401 static int icmp6_iif(const struct sk_buff *skb)
402 {
403 	int iif = skb->dev->ifindex;
404 
405 	/* for local traffic to local address, skb dev is the loopback
406 	 * device. Check if there is a dst attached to the skb and if so
407 	 * get the real device index. Same is needed for replies to a link
408 	 * local address on a device enslaved to an L3 master device
409 	 */
410 	if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
411 		const struct rt6_info *rt6 = skb_rt6_info(skb);
412 
413 		if (rt6)
414 			iif = rt6->rt6i_idev->dev->ifindex;
415 	}
416 
417 	return iif;
418 }
419 
420 /*
421  *	Send an ICMP message in response to a packet in error
422  */
423 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
424 		       const struct in6_addr *force_saddr)
425 {
426 	struct net *net = dev_net(skb->dev);
427 	struct inet6_dev *idev = NULL;
428 	struct ipv6hdr *hdr = ipv6_hdr(skb);
429 	struct sock *sk;
430 	struct ipv6_pinfo *np;
431 	const struct in6_addr *saddr = NULL;
432 	struct dst_entry *dst;
433 	struct icmp6hdr tmp_hdr;
434 	struct flowi6 fl6;
435 	struct icmpv6_msg msg;
436 	struct ipcm6_cookie ipc6;
437 	int iif = 0;
438 	int addr_type = 0;
439 	int len;
440 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
441 
442 	if ((u8 *)hdr < skb->head ||
443 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
444 		return;
445 
446 	/*
447 	 *	Make sure we respect the rules
448 	 *	i.e. RFC 1885 2.4(e)
449 	 *	Rule (e.1) is enforced by not using icmp6_send
450 	 *	in any code that processes icmp errors.
451 	 */
452 	addr_type = ipv6_addr_type(&hdr->daddr);
453 
454 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
455 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
456 		saddr = &hdr->daddr;
457 
458 	/*
459 	 *	Dest addr check
460 	 */
461 
462 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
463 		if (type != ICMPV6_PKT_TOOBIG &&
464 		    !(type == ICMPV6_PARAMPROB &&
465 		      code == ICMPV6_UNK_OPTION &&
466 		      (opt_unrec(skb, info))))
467 			return;
468 
469 		saddr = NULL;
470 	}
471 
472 	addr_type = ipv6_addr_type(&hdr->saddr);
473 
474 	/*
475 	 *	Source addr check
476 	 */
477 
478 	if (__ipv6_addr_needs_scope_id(addr_type)) {
479 		iif = icmp6_iif(skb);
480 	} else {
481 		dst = skb_dst(skb);
482 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
483 	}
484 
485 	/*
486 	 *	Must not send error if the source does not uniquely
487 	 *	identify a single node (RFC2463 Section 2.4).
488 	 *	We check unspecified / multicast addresses here,
489 	 *	and anycast addresses will be checked later.
490 	 */
491 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
492 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
493 				    &hdr->saddr, &hdr->daddr);
494 		return;
495 	}
496 
497 	/*
498 	 *	Never answer to a ICMP packet.
499 	 */
500 	if (is_ineligible(skb)) {
501 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
502 				    &hdr->saddr, &hdr->daddr);
503 		return;
504 	}
505 
506 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
507 	local_bh_disable();
508 
509 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
510 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
511 		goto out_bh_enable;
512 
513 	mip6_addr_swap(skb);
514 
515 	memset(&fl6, 0, sizeof(fl6));
516 	fl6.flowi6_proto = IPPROTO_ICMPV6;
517 	fl6.daddr = hdr->saddr;
518 	if (force_saddr)
519 		saddr = force_saddr;
520 	if (saddr)
521 		fl6.saddr = *saddr;
522 	fl6.flowi6_mark = mark;
523 	fl6.flowi6_oif = iif;
524 	fl6.fl6_icmp_type = type;
525 	fl6.fl6_icmp_code = code;
526 	fl6.flowi6_uid = sock_net_uid(net, NULL);
527 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
528 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
529 
530 	sk = icmpv6_xmit_lock(net);
531 	if (!sk)
532 		goto out_bh_enable;
533 
534 	sk->sk_mark = mark;
535 	np = inet6_sk(sk);
536 
537 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
538 		goto out;
539 
540 	tmp_hdr.icmp6_type = type;
541 	tmp_hdr.icmp6_code = code;
542 	tmp_hdr.icmp6_cksum = 0;
543 	tmp_hdr.icmp6_pointer = htonl(info);
544 
545 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
546 		fl6.flowi6_oif = np->mcast_oif;
547 	else if (!fl6.flowi6_oif)
548 		fl6.flowi6_oif = np->ucast_oif;
549 
550 	ipcm6_init_sk(&ipc6, np);
551 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
552 
553 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
554 	if (IS_ERR(dst))
555 		goto out;
556 
557 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
558 
559 	msg.skb = skb;
560 	msg.offset = skb_network_offset(skb);
561 	msg.type = type;
562 
563 	len = skb->len - msg.offset;
564 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
565 	if (len < 0) {
566 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
567 				    &hdr->saddr, &hdr->daddr);
568 		goto out_dst_release;
569 	}
570 
571 	rcu_read_lock();
572 	idev = __in6_dev_get(skb->dev);
573 
574 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
575 			    len + sizeof(struct icmp6hdr),
576 			    sizeof(struct icmp6hdr),
577 			    &ipc6, &fl6, (struct rt6_info *)dst,
578 			    MSG_DONTWAIT)) {
579 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
580 		ip6_flush_pending_frames(sk);
581 	} else {
582 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
583 					   len + sizeof(struct icmp6hdr));
584 	}
585 	rcu_read_unlock();
586 out_dst_release:
587 	dst_release(dst);
588 out:
589 	icmpv6_xmit_unlock(sk);
590 out_bh_enable:
591 	local_bh_enable();
592 }
593 
594 /* Slightly more convenient version of icmp6_send.
595  */
596 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
597 {
598 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
599 	kfree_skb(skb);
600 }
601 
602 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
603  * if sufficient data bytes are available
604  * @nhs is the size of the tunnel header(s) :
605  *  Either an IPv4 header for SIT encap
606  *         an IPv4 header + GRE header for GRE encap
607  */
608 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
609 			       unsigned int data_len)
610 {
611 	struct in6_addr temp_saddr;
612 	struct rt6_info *rt;
613 	struct sk_buff *skb2;
614 	u32 info = 0;
615 
616 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
617 		return 1;
618 
619 	/* RFC 4884 (partial) support for ICMP extensions */
620 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
621 		data_len = 0;
622 
623 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
624 
625 	if (!skb2)
626 		return 1;
627 
628 	skb_dst_drop(skb2);
629 	skb_pull(skb2, nhs);
630 	skb_reset_network_header(skb2);
631 
632 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
633 			skb, 0);
634 
635 	if (rt && rt->dst.dev)
636 		skb2->dev = rt->dst.dev;
637 
638 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
639 
640 	if (data_len) {
641 		/* RFC 4884 (partial) support :
642 		 * insert 0 padding at the end, before the extensions
643 		 */
644 		__skb_push(skb2, nhs);
645 		skb_reset_network_header(skb2);
646 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
647 		memset(skb2->data + data_len - nhs, 0, nhs);
648 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
649 		 * and stored in reserved[0]
650 		 */
651 		info = (data_len/8) << 24;
652 	}
653 	if (type == ICMP_TIME_EXCEEDED)
654 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
655 			   info, &temp_saddr);
656 	else
657 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
658 			   info, &temp_saddr);
659 	if (rt)
660 		ip6_rt_put(rt);
661 
662 	kfree_skb(skb2);
663 
664 	return 0;
665 }
666 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
667 
668 static void icmpv6_echo_reply(struct sk_buff *skb)
669 {
670 	struct net *net = dev_net(skb->dev);
671 	struct sock *sk;
672 	struct inet6_dev *idev;
673 	struct ipv6_pinfo *np;
674 	const struct in6_addr *saddr = NULL;
675 	struct icmp6hdr *icmph = icmp6_hdr(skb);
676 	struct icmp6hdr tmp_hdr;
677 	struct flowi6 fl6;
678 	struct icmpv6_msg msg;
679 	struct dst_entry *dst;
680 	struct ipcm6_cookie ipc6;
681 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
682 
683 	saddr = &ipv6_hdr(skb)->daddr;
684 
685 	if (!ipv6_unicast_destination(skb) &&
686 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
687 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
688 		saddr = NULL;
689 
690 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
691 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
692 
693 	memset(&fl6, 0, sizeof(fl6));
694 	fl6.flowi6_proto = IPPROTO_ICMPV6;
695 	fl6.daddr = ipv6_hdr(skb)->saddr;
696 	if (saddr)
697 		fl6.saddr = *saddr;
698 	fl6.flowi6_oif = icmp6_iif(skb);
699 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
700 	fl6.flowi6_mark = mark;
701 	fl6.flowi6_uid = sock_net_uid(net, NULL);
702 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
703 
704 	local_bh_disable();
705 	sk = icmpv6_xmit_lock(net);
706 	if (!sk)
707 		goto out_bh_enable;
708 	sk->sk_mark = mark;
709 	np = inet6_sk(sk);
710 
711 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
712 		fl6.flowi6_oif = np->mcast_oif;
713 	else if (!fl6.flowi6_oif)
714 		fl6.flowi6_oif = np->ucast_oif;
715 
716 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
717 		goto out;
718 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
719 	if (IS_ERR(dst))
720 		goto out;
721 
722 	idev = __in6_dev_get(skb->dev);
723 
724 	msg.skb = skb;
725 	msg.offset = 0;
726 	msg.type = ICMPV6_ECHO_REPLY;
727 
728 	ipcm6_init_sk(&ipc6, np);
729 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
730 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
731 
732 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
733 			    skb->len + sizeof(struct icmp6hdr),
734 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
735 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
736 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
737 		ip6_flush_pending_frames(sk);
738 	} else {
739 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
740 					   skb->len + sizeof(struct icmp6hdr));
741 	}
742 	dst_release(dst);
743 out:
744 	icmpv6_xmit_unlock(sk);
745 out_bh_enable:
746 	local_bh_enable();
747 }
748 
749 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
750 {
751 	const struct inet6_protocol *ipprot;
752 	int inner_offset;
753 	__be16 frag_off;
754 	u8 nexthdr;
755 	struct net *net = dev_net(skb->dev);
756 
757 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
758 		goto out;
759 
760 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
761 	if (ipv6_ext_hdr(nexthdr)) {
762 		/* now skip over extension headers */
763 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
764 						&nexthdr, &frag_off);
765 		if (inner_offset < 0)
766 			goto out;
767 	} else {
768 		inner_offset = sizeof(struct ipv6hdr);
769 	}
770 
771 	/* Checkin header including 8 bytes of inner protocol header. */
772 	if (!pskb_may_pull(skb, inner_offset+8))
773 		goto out;
774 
775 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
776 	   Without this we will not able f.e. to make source routed
777 	   pmtu discovery.
778 	   Corresponding argument (opt) to notifiers is already added.
779 	   --ANK (980726)
780 	 */
781 
782 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
783 	if (ipprot && ipprot->err_handler)
784 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
785 
786 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
787 	return;
788 
789 out:
790 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
791 }
792 
793 /*
794  *	Handle icmp messages
795  */
796 
797 static int icmpv6_rcv(struct sk_buff *skb)
798 {
799 	struct net *net = dev_net(skb->dev);
800 	struct net_device *dev = skb->dev;
801 	struct inet6_dev *idev = __in6_dev_get(dev);
802 	const struct in6_addr *saddr, *daddr;
803 	struct icmp6hdr *hdr;
804 	u8 type;
805 	bool success = false;
806 
807 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
808 		struct sec_path *sp = skb_sec_path(skb);
809 		int nh;
810 
811 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
812 				 XFRM_STATE_ICMP))
813 			goto drop_no_count;
814 
815 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
816 			goto drop_no_count;
817 
818 		nh = skb_network_offset(skb);
819 		skb_set_network_header(skb, sizeof(*hdr));
820 
821 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
822 			goto drop_no_count;
823 
824 		skb_set_network_header(skb, nh);
825 	}
826 
827 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
828 
829 	saddr = &ipv6_hdr(skb)->saddr;
830 	daddr = &ipv6_hdr(skb)->daddr;
831 
832 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
833 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
834 				    saddr, daddr);
835 		goto csum_error;
836 	}
837 
838 	if (!pskb_pull(skb, sizeof(*hdr)))
839 		goto discard_it;
840 
841 	hdr = icmp6_hdr(skb);
842 
843 	type = hdr->icmp6_type;
844 
845 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
846 
847 	switch (type) {
848 	case ICMPV6_ECHO_REQUEST:
849 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
850 			icmpv6_echo_reply(skb);
851 		break;
852 
853 	case ICMPV6_ECHO_REPLY:
854 		success = ping_rcv(skb);
855 		break;
856 
857 	case ICMPV6_PKT_TOOBIG:
858 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
859 		   standard destination cache. Seems, only "advanced"
860 		   destination cache will allow to solve this problem
861 		   --ANK (980726)
862 		 */
863 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
864 			goto discard_it;
865 		hdr = icmp6_hdr(skb);
866 
867 		/* to notify */
868 		/* fall through */
869 	case ICMPV6_DEST_UNREACH:
870 	case ICMPV6_TIME_EXCEED:
871 	case ICMPV6_PARAMPROB:
872 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
873 		break;
874 
875 	case NDISC_ROUTER_SOLICITATION:
876 	case NDISC_ROUTER_ADVERTISEMENT:
877 	case NDISC_NEIGHBOUR_SOLICITATION:
878 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
879 	case NDISC_REDIRECT:
880 		ndisc_rcv(skb);
881 		break;
882 
883 	case ICMPV6_MGM_QUERY:
884 		igmp6_event_query(skb);
885 		break;
886 
887 	case ICMPV6_MGM_REPORT:
888 		igmp6_event_report(skb);
889 		break;
890 
891 	case ICMPV6_MGM_REDUCTION:
892 	case ICMPV6_NI_QUERY:
893 	case ICMPV6_NI_REPLY:
894 	case ICMPV6_MLD2_REPORT:
895 	case ICMPV6_DHAAD_REQUEST:
896 	case ICMPV6_DHAAD_REPLY:
897 	case ICMPV6_MOBILE_PREFIX_SOL:
898 	case ICMPV6_MOBILE_PREFIX_ADV:
899 		break;
900 
901 	default:
902 		/* informational */
903 		if (type & ICMPV6_INFOMSG_MASK)
904 			break;
905 
906 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
907 				    saddr, daddr);
908 
909 		/*
910 		 * error of unknown type.
911 		 * must pass to upper level
912 		 */
913 
914 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
915 	}
916 
917 	/* until the v6 path can be better sorted assume failure and
918 	 * preserve the status quo behaviour for the rest of the paths to here
919 	 */
920 	if (success)
921 		consume_skb(skb);
922 	else
923 		kfree_skb(skb);
924 
925 	return 0;
926 
927 csum_error:
928 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
929 discard_it:
930 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
931 drop_no_count:
932 	kfree_skb(skb);
933 	return 0;
934 }
935 
936 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
937 		      u8 type,
938 		      const struct in6_addr *saddr,
939 		      const struct in6_addr *daddr,
940 		      int oif)
941 {
942 	memset(fl6, 0, sizeof(*fl6));
943 	fl6->saddr = *saddr;
944 	fl6->daddr = *daddr;
945 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
946 	fl6->fl6_icmp_type	= type;
947 	fl6->fl6_icmp_code	= 0;
948 	fl6->flowi6_oif		= oif;
949 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
950 }
951 
952 static int __net_init icmpv6_sk_init(struct net *net)
953 {
954 	struct sock *sk;
955 	int err, i, j;
956 
957 	net->ipv6.icmp_sk =
958 		kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
959 	if (!net->ipv6.icmp_sk)
960 		return -ENOMEM;
961 
962 	for_each_possible_cpu(i) {
963 		err = inet_ctl_sock_create(&sk, PF_INET6,
964 					   SOCK_RAW, IPPROTO_ICMPV6, net);
965 		if (err < 0) {
966 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
967 			       err);
968 			goto fail;
969 		}
970 
971 		net->ipv6.icmp_sk[i] = sk;
972 
973 		/* Enough space for 2 64K ICMP packets, including
974 		 * sk_buff struct overhead.
975 		 */
976 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
977 	}
978 	return 0;
979 
980  fail:
981 	for (j = 0; j < i; j++)
982 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
983 	kfree(net->ipv6.icmp_sk);
984 	return err;
985 }
986 
987 static void __net_exit icmpv6_sk_exit(struct net *net)
988 {
989 	int i;
990 
991 	for_each_possible_cpu(i) {
992 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
993 	}
994 	kfree(net->ipv6.icmp_sk);
995 }
996 
997 static struct pernet_operations icmpv6_sk_ops = {
998 	.init = icmpv6_sk_init,
999 	.exit = icmpv6_sk_exit,
1000 };
1001 
1002 int __init icmpv6_init(void)
1003 {
1004 	int err;
1005 
1006 	err = register_pernet_subsys(&icmpv6_sk_ops);
1007 	if (err < 0)
1008 		return err;
1009 
1010 	err = -EAGAIN;
1011 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1012 		goto fail;
1013 
1014 	err = inet6_register_icmp_sender(icmp6_send);
1015 	if (err)
1016 		goto sender_reg_err;
1017 	return 0;
1018 
1019 sender_reg_err:
1020 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1021 fail:
1022 	pr_err("Failed to register ICMP6 protocol\n");
1023 	unregister_pernet_subsys(&icmpv6_sk_ops);
1024 	return err;
1025 }
1026 
1027 void icmpv6_cleanup(void)
1028 {
1029 	inet6_unregister_icmp_sender(icmp6_send);
1030 	unregister_pernet_subsys(&icmpv6_sk_ops);
1031 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1032 }
1033 
1034 
1035 static const struct icmp6_err {
1036 	int err;
1037 	int fatal;
1038 } tab_unreach[] = {
1039 	{	/* NOROUTE */
1040 		.err	= ENETUNREACH,
1041 		.fatal	= 0,
1042 	},
1043 	{	/* ADM_PROHIBITED */
1044 		.err	= EACCES,
1045 		.fatal	= 1,
1046 	},
1047 	{	/* Was NOT_NEIGHBOUR, now reserved */
1048 		.err	= EHOSTUNREACH,
1049 		.fatal	= 0,
1050 	},
1051 	{	/* ADDR_UNREACH	*/
1052 		.err	= EHOSTUNREACH,
1053 		.fatal	= 0,
1054 	},
1055 	{	/* PORT_UNREACH	*/
1056 		.err	= ECONNREFUSED,
1057 		.fatal	= 1,
1058 	},
1059 	{	/* POLICY_FAIL */
1060 		.err	= EACCES,
1061 		.fatal	= 1,
1062 	},
1063 	{	/* REJECT_ROUTE	*/
1064 		.err	= EACCES,
1065 		.fatal	= 1,
1066 	},
1067 };
1068 
1069 int icmpv6_err_convert(u8 type, u8 code, int *err)
1070 {
1071 	int fatal = 0;
1072 
1073 	*err = EPROTO;
1074 
1075 	switch (type) {
1076 	case ICMPV6_DEST_UNREACH:
1077 		fatal = 1;
1078 		if (code < ARRAY_SIZE(tab_unreach)) {
1079 			*err  = tab_unreach[code].err;
1080 			fatal = tab_unreach[code].fatal;
1081 		}
1082 		break;
1083 
1084 	case ICMPV6_PKT_TOOBIG:
1085 		*err = EMSGSIZE;
1086 		break;
1087 
1088 	case ICMPV6_PARAMPROB:
1089 		*err = EPROTO;
1090 		fatal = 1;
1091 		break;
1092 
1093 	case ICMPV6_TIME_EXCEED:
1094 		*err = EHOSTUNREACH;
1095 		break;
1096 	}
1097 
1098 	return fatal;
1099 }
1100 EXPORT_SYMBOL(icmpv6_err_convert);
1101 
1102 #ifdef CONFIG_SYSCTL
1103 static struct ctl_table ipv6_icmp_table_template[] = {
1104 	{
1105 		.procname	= "ratelimit",
1106 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1107 		.maxlen		= sizeof(int),
1108 		.mode		= 0644,
1109 		.proc_handler	= proc_dointvec_ms_jiffies,
1110 	},
1111 	{
1112 		.procname	= "echo_ignore_all",
1113 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1114 		.maxlen		= sizeof(int),
1115 		.mode		= 0644,
1116 		.proc_handler = proc_dointvec,
1117 	},
1118 	{ },
1119 };
1120 
1121 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1122 {
1123 	struct ctl_table *table;
1124 
1125 	table = kmemdup(ipv6_icmp_table_template,
1126 			sizeof(ipv6_icmp_table_template),
1127 			GFP_KERNEL);
1128 
1129 	if (table) {
1130 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1131 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1132 	}
1133 	return table;
1134 }
1135 #endif
1136