xref: /openbmc/linux/net/ipv6/icmp.c (revision 6d0bfe22)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 
71 #include <asm/uaccess.h>
72 
73 /*
74  *	The ICMP socket(s). This is the most convenient way to flow control
75  *	our ICMP output as well as maintain a clean interface throughout
76  *	all layers. All Socketless IP sends will soon be gone.
77  *
78  *	On SMP we have one ICMP socket per-cpu.
79  */
80 static inline struct sock *icmpv6_sk(struct net *net)
81 {
82 	return net->ipv6.icmp_sk[smp_processor_id()];
83 }
84 
85 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
86 		       u8 type, u8 code, int offset, __be32 info)
87 {
88 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
89 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
90 	struct net *net = dev_net(skb->dev);
91 
92 	if (type == ICMPV6_PKT_TOOBIG)
93 		ip6_update_pmtu(skb, net, info, 0, 0);
94 	else if (type == NDISC_REDIRECT)
95 		ip6_redirect(skb, net, 0, 0);
96 
97 	if (!(type & ICMPV6_INFOMSG_MASK))
98 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
99 			ping_err(skb, offset, info);
100 }
101 
102 static int icmpv6_rcv(struct sk_buff *skb);
103 
104 static const struct inet6_protocol icmpv6_protocol = {
105 	.handler	=	icmpv6_rcv,
106 	.err_handler	=	icmpv6_err,
107 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
108 };
109 
110 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
111 {
112 	struct sock *sk;
113 
114 	local_bh_disable();
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		local_bh_enable();
123 		return NULL;
124 	}
125 	return sk;
126 }
127 
128 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
129 {
130 	spin_unlock_bh(&sk->sk_lock.slock);
131 }
132 
133 /*
134  * Figure out, may we reply to this packet with icmp error.
135  *
136  * We do not reply, if:
137  *	- it was icmp error message.
138  *	- it is truncated, so that it is known, that protocol is ICMPV6
139  *	  (i.e. in the middle of some exthdr)
140  *
141  *	--ANK (980726)
142  */
143 
144 static bool is_ineligible(const struct sk_buff *skb)
145 {
146 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
147 	int len = skb->len - ptr;
148 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
149 	__be16 frag_off;
150 
151 	if (len < 0)
152 		return true;
153 
154 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
155 	if (ptr < 0)
156 		return false;
157 	if (nexthdr == IPPROTO_ICMPV6) {
158 		u8 _type, *tp;
159 		tp = skb_header_pointer(skb,
160 			ptr+offsetof(struct icmp6hdr, icmp6_type),
161 			sizeof(_type), &_type);
162 		if (tp == NULL ||
163 		    !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 /*
170  * Check the ICMP output rate limit
171  */
172 static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
173 				      struct flowi6 *fl6)
174 {
175 	struct dst_entry *dst;
176 	struct net *net = sock_net(sk);
177 	bool res = false;
178 
179 	/* Informational messages are not limited. */
180 	if (type & ICMPV6_INFOMSG_MASK)
181 		return true;
182 
183 	/* Do not limit pmtu discovery, it would break it. */
184 	if (type == ICMPV6_PKT_TOOBIG)
185 		return true;
186 
187 	/*
188 	 * Look up the output route.
189 	 * XXX: perhaps the expire for routing entries cloned by
190 	 * this lookup should be more aggressive (not longer than timeout).
191 	 */
192 	dst = ip6_route_output(net, sk, fl6);
193 	if (dst->error) {
194 		IP6_INC_STATS(net, ip6_dst_idev(dst),
195 			      IPSTATS_MIB_OUTNOROUTES);
196 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
197 		res = true;
198 	} else {
199 		struct rt6_info *rt = (struct rt6_info *)dst;
200 		int tmo = net->ipv6.sysctl.icmpv6_time;
201 		struct inet_peer *peer;
202 
203 		/* Give more bandwidth to wider prefixes. */
204 		if (rt->rt6i_dst.plen < 128)
205 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
206 
207 		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
208 		res = inet_peer_xrlim_allow(peer, tmo);
209 		if (peer)
210 			inet_putpeer(peer);
211 	}
212 	dst_release(dst);
213 	return res;
214 }
215 
216 /*
217  *	an inline helper for the "simple" if statement below
218  *	checks if parameter problem report is caused by an
219  *	unrecognized IPv6 option that has the Option Type
220  *	highest-order two bits set to 10
221  */
222 
223 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
224 {
225 	u8 _optval, *op;
226 
227 	offset += skb_network_offset(skb);
228 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
229 	if (op == NULL)
230 		return true;
231 	return (*op & 0xC0) == 0x80;
232 }
233 
234 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
235 			       struct icmp6hdr *thdr, int len)
236 {
237 	struct sk_buff *skb;
238 	struct icmp6hdr *icmp6h;
239 	int err = 0;
240 
241 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
242 		goto out;
243 
244 	icmp6h = icmp6_hdr(skb);
245 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
246 	icmp6h->icmp6_cksum = 0;
247 
248 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
249 		skb->csum = csum_partial(icmp6h,
250 					sizeof(struct icmp6hdr), skb->csum);
251 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
252 						      &fl6->daddr,
253 						      len, fl6->flowi6_proto,
254 						      skb->csum);
255 	} else {
256 		__wsum tmp_csum = 0;
257 
258 		skb_queue_walk(&sk->sk_write_queue, skb) {
259 			tmp_csum = csum_add(tmp_csum, skb->csum);
260 		}
261 
262 		tmp_csum = csum_partial(icmp6h,
263 					sizeof(struct icmp6hdr), tmp_csum);
264 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
265 						      &fl6->daddr,
266 						      len, fl6->flowi6_proto,
267 						      tmp_csum);
268 	}
269 	ip6_push_pending_frames(sk);
270 out:
271 	return err;
272 }
273 
274 struct icmpv6_msg {
275 	struct sk_buff	*skb;
276 	int		offset;
277 	uint8_t		type;
278 };
279 
280 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
281 {
282 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
283 	struct sk_buff *org_skb = msg->skb;
284 	__wsum csum = 0;
285 
286 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
287 				      to, len, csum);
288 	skb->csum = csum_block_add(skb->csum, csum, odd);
289 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
290 		nf_ct_attach(skb, org_skb);
291 	return 0;
292 }
293 
294 #if IS_ENABLED(CONFIG_IPV6_MIP6)
295 static void mip6_addr_swap(struct sk_buff *skb)
296 {
297 	struct ipv6hdr *iph = ipv6_hdr(skb);
298 	struct inet6_skb_parm *opt = IP6CB(skb);
299 	struct ipv6_destopt_hao *hao;
300 	struct in6_addr tmp;
301 	int off;
302 
303 	if (opt->dsthao) {
304 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
305 		if (likely(off >= 0)) {
306 			hao = (struct ipv6_destopt_hao *)
307 					(skb_network_header(skb) + off);
308 			tmp = iph->saddr;
309 			iph->saddr = hao->addr;
310 			hao->addr = tmp;
311 		}
312 	}
313 }
314 #else
315 static inline void mip6_addr_swap(struct sk_buff *skb) {}
316 #endif
317 
318 struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
319 				      struct sock *sk, struct flowi6 *fl6)
320 {
321 	struct dst_entry *dst, *dst2;
322 	struct flowi6 fl2;
323 	int err;
324 
325 	err = ip6_dst_lookup(sk, &dst, fl6);
326 	if (err)
327 		return ERR_PTR(err);
328 
329 	/*
330 	 * We won't send icmp if the destination is known
331 	 * anycast.
332 	 */
333 	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
334 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
335 		dst_release(dst);
336 		return ERR_PTR(-EINVAL);
337 	}
338 
339 	/* No need to clone since we're just using its address. */
340 	dst2 = dst;
341 
342 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
343 	if (!IS_ERR(dst)) {
344 		if (dst != dst2)
345 			return dst;
346 	} else {
347 		if (PTR_ERR(dst) == -EPERM)
348 			dst = NULL;
349 		else
350 			return dst;
351 	}
352 
353 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
354 	if (err)
355 		goto relookup_failed;
356 
357 	err = ip6_dst_lookup(sk, &dst2, &fl2);
358 	if (err)
359 		goto relookup_failed;
360 
361 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
362 	if (!IS_ERR(dst2)) {
363 		dst_release(dst);
364 		dst = dst2;
365 	} else {
366 		err = PTR_ERR(dst2);
367 		if (err == -EPERM) {
368 			dst_release(dst);
369 			return dst2;
370 		} else
371 			goto relookup_failed;
372 	}
373 
374 relookup_failed:
375 	if (dst)
376 		return dst;
377 	return ERR_PTR(err);
378 }
379 
380 /*
381  *	Send an ICMP message in response to a packet in error
382  */
383 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
384 {
385 	struct net *net = dev_net(skb->dev);
386 	struct inet6_dev *idev = NULL;
387 	struct ipv6hdr *hdr = ipv6_hdr(skb);
388 	struct sock *sk;
389 	struct ipv6_pinfo *np;
390 	const struct in6_addr *saddr = NULL;
391 	struct dst_entry *dst;
392 	struct icmp6hdr tmp_hdr;
393 	struct flowi6 fl6;
394 	struct icmpv6_msg msg;
395 	int iif = 0;
396 	int addr_type = 0;
397 	int len;
398 	int hlimit;
399 	int err = 0;
400 
401 	if ((u8 *)hdr < skb->head ||
402 	    (skb->network_header + sizeof(*hdr)) > skb->tail)
403 		return;
404 
405 	/*
406 	 *	Make sure we respect the rules
407 	 *	i.e. RFC 1885 2.4(e)
408 	 *	Rule (e.1) is enforced by not using icmp6_send
409 	 *	in any code that processes icmp errors.
410 	 */
411 	addr_type = ipv6_addr_type(&hdr->daddr);
412 
413 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0))
414 		saddr = &hdr->daddr;
415 
416 	/*
417 	 *	Dest addr check
418 	 */
419 
420 	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
421 		if (type != ICMPV6_PKT_TOOBIG &&
422 		    !(type == ICMPV6_PARAMPROB &&
423 		      code == ICMPV6_UNK_OPTION &&
424 		      (opt_unrec(skb, info))))
425 			return;
426 
427 		saddr = NULL;
428 	}
429 
430 	addr_type = ipv6_addr_type(&hdr->saddr);
431 
432 	/*
433 	 *	Source addr check
434 	 */
435 
436 	if (__ipv6_addr_needs_scope_id(addr_type))
437 		iif = skb->dev->ifindex;
438 
439 	/*
440 	 *	Must not send error if the source does not uniquely
441 	 *	identify a single node (RFC2463 Section 2.4).
442 	 *	We check unspecified / multicast addresses here,
443 	 *	and anycast addresses will be checked later.
444 	 */
445 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
446 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
447 		return;
448 	}
449 
450 	/*
451 	 *	Never answer to a ICMP packet.
452 	 */
453 	if (is_ineligible(skb)) {
454 		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
455 		return;
456 	}
457 
458 	mip6_addr_swap(skb);
459 
460 	memset(&fl6, 0, sizeof(fl6));
461 	fl6.flowi6_proto = IPPROTO_ICMPV6;
462 	fl6.daddr = hdr->saddr;
463 	if (saddr)
464 		fl6.saddr = *saddr;
465 	fl6.flowi6_oif = iif;
466 	fl6.fl6_icmp_type = type;
467 	fl6.fl6_icmp_code = code;
468 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
469 
470 	sk = icmpv6_xmit_lock(net);
471 	if (sk == NULL)
472 		return;
473 	np = inet6_sk(sk);
474 
475 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
476 		goto out;
477 
478 	tmp_hdr.icmp6_type = type;
479 	tmp_hdr.icmp6_code = code;
480 	tmp_hdr.icmp6_cksum = 0;
481 	tmp_hdr.icmp6_pointer = htonl(info);
482 
483 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
484 		fl6.flowi6_oif = np->mcast_oif;
485 	else if (!fl6.flowi6_oif)
486 		fl6.flowi6_oif = np->ucast_oif;
487 
488 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
489 	if (IS_ERR(dst))
490 		goto out;
491 
492 	if (ipv6_addr_is_multicast(&fl6.daddr))
493 		hlimit = np->mcast_hops;
494 	else
495 		hlimit = np->hop_limit;
496 	if (hlimit < 0)
497 		hlimit = ip6_dst_hoplimit(dst);
498 
499 	msg.skb = skb;
500 	msg.offset = skb_network_offset(skb);
501 	msg.type = type;
502 
503 	len = skb->len - msg.offset;
504 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
505 	if (len < 0) {
506 		LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
507 		goto out_dst_release;
508 	}
509 
510 	rcu_read_lock();
511 	idev = __in6_dev_get(skb->dev);
512 
513 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
514 			      len + sizeof(struct icmp6hdr),
515 			      sizeof(struct icmp6hdr), hlimit,
516 			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,
517 			      MSG_DONTWAIT, np->dontfrag);
518 	if (err) {
519 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
520 		ip6_flush_pending_frames(sk);
521 	} else {
522 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
523 						 len + sizeof(struct icmp6hdr));
524 	}
525 	rcu_read_unlock();
526 out_dst_release:
527 	dst_release(dst);
528 out:
529 	icmpv6_xmit_unlock(sk);
530 }
531 
532 /* Slightly more convenient version of icmp6_send.
533  */
534 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
535 {
536 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
537 	kfree_skb(skb);
538 }
539 
540 static void icmpv6_echo_reply(struct sk_buff *skb)
541 {
542 	struct net *net = dev_net(skb->dev);
543 	struct sock *sk;
544 	struct inet6_dev *idev;
545 	struct ipv6_pinfo *np;
546 	const struct in6_addr *saddr = NULL;
547 	struct icmp6hdr *icmph = icmp6_hdr(skb);
548 	struct icmp6hdr tmp_hdr;
549 	struct flowi6 fl6;
550 	struct icmpv6_msg msg;
551 	struct dst_entry *dst;
552 	int err = 0;
553 	int hlimit;
554 
555 	saddr = &ipv6_hdr(skb)->daddr;
556 
557 	if (!ipv6_unicast_destination(skb))
558 		saddr = NULL;
559 
560 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
561 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
562 
563 	memset(&fl6, 0, sizeof(fl6));
564 	fl6.flowi6_proto = IPPROTO_ICMPV6;
565 	fl6.daddr = ipv6_hdr(skb)->saddr;
566 	if (saddr)
567 		fl6.saddr = *saddr;
568 	fl6.flowi6_oif = skb->dev->ifindex;
569 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
570 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
571 
572 	sk = icmpv6_xmit_lock(net);
573 	if (sk == NULL)
574 		return;
575 	np = inet6_sk(sk);
576 
577 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
578 		fl6.flowi6_oif = np->mcast_oif;
579 	else if (!fl6.flowi6_oif)
580 		fl6.flowi6_oif = np->ucast_oif;
581 
582 	err = ip6_dst_lookup(sk, &dst, &fl6);
583 	if (err)
584 		goto out;
585 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
586 	if (IS_ERR(dst))
587 		goto out;
588 
589 	if (ipv6_addr_is_multicast(&fl6.daddr))
590 		hlimit = np->mcast_hops;
591 	else
592 		hlimit = np->hop_limit;
593 	if (hlimit < 0)
594 		hlimit = ip6_dst_hoplimit(dst);
595 
596 	idev = __in6_dev_get(skb->dev);
597 
598 	msg.skb = skb;
599 	msg.offset = 0;
600 	msg.type = ICMPV6_ECHO_REPLY;
601 
602 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
603 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
604 				(struct rt6_info *)dst, MSG_DONTWAIT,
605 				np->dontfrag);
606 
607 	if (err) {
608 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
609 		ip6_flush_pending_frames(sk);
610 	} else {
611 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
612 						 skb->len + sizeof(struct icmp6hdr));
613 	}
614 	dst_release(dst);
615 out:
616 	icmpv6_xmit_unlock(sk);
617 }
618 
619 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
620 {
621 	const struct inet6_protocol *ipprot;
622 	int inner_offset;
623 	__be16 frag_off;
624 	u8 nexthdr;
625 
626 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
627 		return;
628 
629 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
630 	if (ipv6_ext_hdr(nexthdr)) {
631 		/* now skip over extension headers */
632 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
633 						&nexthdr, &frag_off);
634 		if (inner_offset<0)
635 			return;
636 	} else {
637 		inner_offset = sizeof(struct ipv6hdr);
638 	}
639 
640 	/* Checkin header including 8 bytes of inner protocol header. */
641 	if (!pskb_may_pull(skb, inner_offset+8))
642 		return;
643 
644 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
645 	   Without this we will not able f.e. to make source routed
646 	   pmtu discovery.
647 	   Corresponding argument (opt) to notifiers is already added.
648 	   --ANK (980726)
649 	 */
650 
651 	rcu_read_lock();
652 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
653 	if (ipprot && ipprot->err_handler)
654 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
655 	rcu_read_unlock();
656 
657 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
658 }
659 
660 /*
661  *	Handle icmp messages
662  */
663 
664 static int icmpv6_rcv(struct sk_buff *skb)
665 {
666 	struct net_device *dev = skb->dev;
667 	struct inet6_dev *idev = __in6_dev_get(dev);
668 	const struct in6_addr *saddr, *daddr;
669 	struct icmp6hdr *hdr;
670 	u8 type;
671 
672 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
673 		struct sec_path *sp = skb_sec_path(skb);
674 		int nh;
675 
676 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
677 				 XFRM_STATE_ICMP))
678 			goto drop_no_count;
679 
680 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
681 			goto drop_no_count;
682 
683 		nh = skb_network_offset(skb);
684 		skb_set_network_header(skb, sizeof(*hdr));
685 
686 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
687 			goto drop_no_count;
688 
689 		skb_set_network_header(skb, nh);
690 	}
691 
692 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
693 
694 	saddr = &ipv6_hdr(skb)->saddr;
695 	daddr = &ipv6_hdr(skb)->daddr;
696 
697 	/* Perform checksum. */
698 	switch (skb->ip_summed) {
699 	case CHECKSUM_COMPLETE:
700 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
701 				     skb->csum))
702 			break;
703 		/* fall through */
704 	case CHECKSUM_NONE:
705 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
706 					     IPPROTO_ICMPV6, 0));
707 		if (__skb_checksum_complete(skb)) {
708 			LIMIT_NETDEBUG(KERN_DEBUG
709 				       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
710 				       saddr, daddr);
711 			goto csum_error;
712 		}
713 	}
714 
715 	if (!pskb_pull(skb, sizeof(*hdr)))
716 		goto discard_it;
717 
718 	hdr = icmp6_hdr(skb);
719 
720 	type = hdr->icmp6_type;
721 
722 	ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
723 
724 	switch (type) {
725 	case ICMPV6_ECHO_REQUEST:
726 		icmpv6_echo_reply(skb);
727 		break;
728 
729 	case ICMPV6_ECHO_REPLY:
730 		ping_rcv(skb);
731 		break;
732 
733 	case ICMPV6_PKT_TOOBIG:
734 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
735 		   standard destination cache. Seems, only "advanced"
736 		   destination cache will allow to solve this problem
737 		   --ANK (980726)
738 		 */
739 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
740 			goto discard_it;
741 		hdr = icmp6_hdr(skb);
742 
743 		/*
744 		 *	Drop through to notify
745 		 */
746 
747 	case ICMPV6_DEST_UNREACH:
748 	case ICMPV6_TIME_EXCEED:
749 	case ICMPV6_PARAMPROB:
750 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
751 		break;
752 
753 	case NDISC_ROUTER_SOLICITATION:
754 	case NDISC_ROUTER_ADVERTISEMENT:
755 	case NDISC_NEIGHBOUR_SOLICITATION:
756 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
757 	case NDISC_REDIRECT:
758 		ndisc_rcv(skb);
759 		break;
760 
761 	case ICMPV6_MGM_QUERY:
762 		igmp6_event_query(skb);
763 		break;
764 
765 	case ICMPV6_MGM_REPORT:
766 		igmp6_event_report(skb);
767 		break;
768 
769 	case ICMPV6_MGM_REDUCTION:
770 	case ICMPV6_NI_QUERY:
771 	case ICMPV6_NI_REPLY:
772 	case ICMPV6_MLD2_REPORT:
773 	case ICMPV6_DHAAD_REQUEST:
774 	case ICMPV6_DHAAD_REPLY:
775 	case ICMPV6_MOBILE_PREFIX_SOL:
776 	case ICMPV6_MOBILE_PREFIX_ADV:
777 		break;
778 
779 	default:
780 		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
781 
782 		/* informational */
783 		if (type & ICMPV6_INFOMSG_MASK)
784 			break;
785 
786 		/*
787 		 * error of unknown type.
788 		 * must pass to upper level
789 		 */
790 
791 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
792 	}
793 
794 	kfree_skb(skb);
795 	return 0;
796 
797 csum_error:
798 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
799 discard_it:
800 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
801 drop_no_count:
802 	kfree_skb(skb);
803 	return 0;
804 }
805 
806 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
807 		      u8 type,
808 		      const struct in6_addr *saddr,
809 		      const struct in6_addr *daddr,
810 		      int oif)
811 {
812 	memset(fl6, 0, sizeof(*fl6));
813 	fl6->saddr = *saddr;
814 	fl6->daddr = *daddr;
815 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
816 	fl6->fl6_icmp_type	= type;
817 	fl6->fl6_icmp_code	= 0;
818 	fl6->flowi6_oif		= oif;
819 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
820 }
821 
822 /*
823  * Special lock-class for __icmpv6_sk:
824  */
825 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
826 
827 static int __net_init icmpv6_sk_init(struct net *net)
828 {
829 	struct sock *sk;
830 	int err, i, j;
831 
832 	net->ipv6.icmp_sk =
833 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
834 	if (net->ipv6.icmp_sk == NULL)
835 		return -ENOMEM;
836 
837 	for_each_possible_cpu(i) {
838 		err = inet_ctl_sock_create(&sk, PF_INET6,
839 					   SOCK_RAW, IPPROTO_ICMPV6, net);
840 		if (err < 0) {
841 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
842 			       err);
843 			goto fail;
844 		}
845 
846 		net->ipv6.icmp_sk[i] = sk;
847 
848 		/*
849 		 * Split off their lock-class, because sk->sk_dst_lock
850 		 * gets used from softirqs, which is safe for
851 		 * __icmpv6_sk (because those never get directly used
852 		 * via userspace syscalls), but unsafe for normal sockets.
853 		 */
854 		lockdep_set_class(&sk->sk_dst_lock,
855 				  &icmpv6_socket_sk_dst_lock_key);
856 
857 		/* Enough space for 2 64K ICMP packets, including
858 		 * sk_buff struct overhead.
859 		 */
860 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
861 	}
862 	return 0;
863 
864  fail:
865 	for (j = 0; j < i; j++)
866 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
867 	kfree(net->ipv6.icmp_sk);
868 	return err;
869 }
870 
871 static void __net_exit icmpv6_sk_exit(struct net *net)
872 {
873 	int i;
874 
875 	for_each_possible_cpu(i) {
876 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
877 	}
878 	kfree(net->ipv6.icmp_sk);
879 }
880 
881 static struct pernet_operations icmpv6_sk_ops = {
882        .init = icmpv6_sk_init,
883        .exit = icmpv6_sk_exit,
884 };
885 
886 int __init icmpv6_init(void)
887 {
888 	int err;
889 
890 	err = register_pernet_subsys(&icmpv6_sk_ops);
891 	if (err < 0)
892 		return err;
893 
894 	err = -EAGAIN;
895 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
896 		goto fail;
897 
898 	err = inet6_register_icmp_sender(icmp6_send);
899 	if (err)
900 		goto sender_reg_err;
901 	return 0;
902 
903 sender_reg_err:
904 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
905 fail:
906 	pr_err("Failed to register ICMP6 protocol\n");
907 	unregister_pernet_subsys(&icmpv6_sk_ops);
908 	return err;
909 }
910 
911 void icmpv6_cleanup(void)
912 {
913 	inet6_unregister_icmp_sender(icmp6_send);
914 	unregister_pernet_subsys(&icmpv6_sk_ops);
915 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
916 }
917 
918 
919 static const struct icmp6_err {
920 	int err;
921 	int fatal;
922 } tab_unreach[] = {
923 	{	/* NOROUTE */
924 		.err	= ENETUNREACH,
925 		.fatal	= 0,
926 	},
927 	{	/* ADM_PROHIBITED */
928 		.err	= EACCES,
929 		.fatal	= 1,
930 	},
931 	{	/* Was NOT_NEIGHBOUR, now reserved */
932 		.err	= EHOSTUNREACH,
933 		.fatal	= 0,
934 	},
935 	{	/* ADDR_UNREACH	*/
936 		.err	= EHOSTUNREACH,
937 		.fatal	= 0,
938 	},
939 	{	/* PORT_UNREACH	*/
940 		.err	= ECONNREFUSED,
941 		.fatal	= 1,
942 	},
943 };
944 
945 int icmpv6_err_convert(u8 type, u8 code, int *err)
946 {
947 	int fatal = 0;
948 
949 	*err = EPROTO;
950 
951 	switch (type) {
952 	case ICMPV6_DEST_UNREACH:
953 		fatal = 1;
954 		if (code <= ICMPV6_PORT_UNREACH) {
955 			*err  = tab_unreach[code].err;
956 			fatal = tab_unreach[code].fatal;
957 		}
958 		break;
959 
960 	case ICMPV6_PKT_TOOBIG:
961 		*err = EMSGSIZE;
962 		break;
963 
964 	case ICMPV6_PARAMPROB:
965 		*err = EPROTO;
966 		fatal = 1;
967 		break;
968 
969 	case ICMPV6_TIME_EXCEED:
970 		*err = EHOSTUNREACH;
971 		break;
972 	}
973 
974 	return fatal;
975 }
976 EXPORT_SYMBOL(icmpv6_err_convert);
977 
978 #ifdef CONFIG_SYSCTL
979 ctl_table ipv6_icmp_table_template[] = {
980 	{
981 		.procname	= "ratelimit",
982 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
983 		.maxlen		= sizeof(int),
984 		.mode		= 0644,
985 		.proc_handler	= proc_dointvec_ms_jiffies,
986 	},
987 	{ },
988 };
989 
990 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
991 {
992 	struct ctl_table *table;
993 
994 	table = kmemdup(ipv6_icmp_table_template,
995 			sizeof(ipv6_icmp_table_template),
996 			GFP_KERNEL);
997 
998 	if (table)
999 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1000 
1001 	return table;
1002 }
1003 #endif
1004 
1005