xref: /openbmc/linux/net/ipv6/icmp.c (revision 7211ec63)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 /* Called with BH disabled */
114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
115 {
116 	struct sock *sk;
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 static bool icmpv6_mask_allow(int type)
170 {
171 	/* Informational messages are not limited. */
172 	if (type & ICMPV6_INFOMSG_MASK)
173 		return true;
174 
175 	/* Do not limit pmtu discovery, it would break it. */
176 	if (type == ICMPV6_PKT_TOOBIG)
177 		return true;
178 
179 	return false;
180 }
181 
182 static bool icmpv6_global_allow(int type)
183 {
184 	if (icmpv6_mask_allow(type))
185 		return true;
186 
187 	if (icmp_global_allow())
188 		return true;
189 
190 	return false;
191 }
192 
193 /*
194  * Check the ICMP output rate limit
195  */
196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
197 			       struct flowi6 *fl6)
198 {
199 	struct net *net = sock_net(sk);
200 	struct dst_entry *dst;
201 	bool res = false;
202 
203 	if (icmpv6_mask_allow(type))
204 		return true;
205 
206 	/*
207 	 * Look up the output route.
208 	 * XXX: perhaps the expire for routing entries cloned by
209 	 * this lookup should be more aggressive (not longer than timeout).
210 	 */
211 	dst = ip6_route_output(net, sk, fl6);
212 	if (dst->error) {
213 		IP6_INC_STATS(net, ip6_dst_idev(dst),
214 			      IPSTATS_MIB_OUTNOROUTES);
215 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216 		res = true;
217 	} else {
218 		struct rt6_info *rt = (struct rt6_info *)dst;
219 		int tmo = net->ipv6.sysctl.icmpv6_time;
220 		struct inet_peer *peer;
221 
222 		/* Give more bandwidth to wider prefixes. */
223 		if (rt->rt6i_dst.plen < 128)
224 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225 
226 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
227 		res = inet_peer_xrlim_allow(peer, tmo);
228 		if (peer)
229 			inet_putpeer(peer);
230 	}
231 	dst_release(dst);
232 	return res;
233 }
234 
235 /*
236  *	an inline helper for the "simple" if statement below
237  *	checks if parameter problem report is caused by an
238  *	unrecognized IPv6 option that has the Option Type
239  *	highest-order two bits set to 10
240  */
241 
242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
243 {
244 	u8 _optval, *op;
245 
246 	offset += skb_network_offset(skb);
247 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248 	if (!op)
249 		return true;
250 	return (*op & 0xC0) == 0x80;
251 }
252 
253 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 			       struct icmp6hdr *thdr, int len)
255 {
256 	struct sk_buff *skb;
257 	struct icmp6hdr *icmp6h;
258 	int err = 0;
259 
260 	skb = skb_peek(&sk->sk_write_queue);
261 	if (!skb)
262 		goto out;
263 
264 	icmp6h = icmp6_hdr(skb);
265 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
266 	icmp6h->icmp6_cksum = 0;
267 
268 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
269 		skb->csum = csum_partial(icmp6h,
270 					sizeof(struct icmp6hdr), skb->csum);
271 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
272 						      &fl6->daddr,
273 						      len, fl6->flowi6_proto,
274 						      skb->csum);
275 	} else {
276 		__wsum tmp_csum = 0;
277 
278 		skb_queue_walk(&sk->sk_write_queue, skb) {
279 			tmp_csum = csum_add(tmp_csum, skb->csum);
280 		}
281 
282 		tmp_csum = csum_partial(icmp6h,
283 					sizeof(struct icmp6hdr), tmp_csum);
284 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
285 						      &fl6->daddr,
286 						      len, fl6->flowi6_proto,
287 						      tmp_csum);
288 	}
289 	ip6_push_pending_frames(sk);
290 out:
291 	return err;
292 }
293 
294 struct icmpv6_msg {
295 	struct sk_buff	*skb;
296 	int		offset;
297 	uint8_t		type;
298 };
299 
300 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
301 {
302 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
303 	struct sk_buff *org_skb = msg->skb;
304 	__wsum csum = 0;
305 
306 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
307 				      to, len, csum);
308 	skb->csum = csum_block_add(skb->csum, csum, odd);
309 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
310 		nf_ct_attach(skb, org_skb);
311 	return 0;
312 }
313 
314 #if IS_ENABLED(CONFIG_IPV6_MIP6)
315 static void mip6_addr_swap(struct sk_buff *skb)
316 {
317 	struct ipv6hdr *iph = ipv6_hdr(skb);
318 	struct inet6_skb_parm *opt = IP6CB(skb);
319 	struct ipv6_destopt_hao *hao;
320 	struct in6_addr tmp;
321 	int off;
322 
323 	if (opt->dsthao) {
324 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
325 		if (likely(off >= 0)) {
326 			hao = (struct ipv6_destopt_hao *)
327 					(skb_network_header(skb) + off);
328 			tmp = iph->saddr;
329 			iph->saddr = hao->addr;
330 			hao->addr = tmp;
331 		}
332 	}
333 }
334 #else
335 static inline void mip6_addr_swap(struct sk_buff *skb) {}
336 #endif
337 
338 static struct dst_entry *icmpv6_route_lookup(struct net *net,
339 					     struct sk_buff *skb,
340 					     struct sock *sk,
341 					     struct flowi6 *fl6)
342 {
343 	struct dst_entry *dst, *dst2;
344 	struct flowi6 fl2;
345 	int err;
346 
347 	err = ip6_dst_lookup(net, sk, &dst, fl6);
348 	if (err)
349 		return ERR_PTR(err);
350 
351 	/*
352 	 * We won't send icmp if the destination is known
353 	 * anycast.
354 	 */
355 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
356 		net_dbg_ratelimited("icmp6_send: acast source\n");
357 		dst_release(dst);
358 		return ERR_PTR(-EINVAL);
359 	}
360 
361 	/* No need to clone since we're just using its address. */
362 	dst2 = dst;
363 
364 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
365 	if (!IS_ERR(dst)) {
366 		if (dst != dst2)
367 			return dst;
368 	} else {
369 		if (PTR_ERR(dst) == -EPERM)
370 			dst = NULL;
371 		else
372 			return dst;
373 	}
374 
375 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
376 	if (err)
377 		goto relookup_failed;
378 
379 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
380 	if (err)
381 		goto relookup_failed;
382 
383 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
384 	if (!IS_ERR(dst2)) {
385 		dst_release(dst);
386 		dst = dst2;
387 	} else {
388 		err = PTR_ERR(dst2);
389 		if (err == -EPERM) {
390 			dst_release(dst);
391 			return dst2;
392 		} else
393 			goto relookup_failed;
394 	}
395 
396 relookup_failed:
397 	if (dst)
398 		return dst;
399 	return ERR_PTR(err);
400 }
401 
402 static int icmp6_iif(const struct sk_buff *skb)
403 {
404 	int iif = skb->dev->ifindex;
405 
406 	/* for local traffic to local address, skb dev is the loopback
407 	 * device. Check if there is a dst attached to the skb and if so
408 	 * get the real device index.
409 	 */
410 	if (unlikely(iif == LOOPBACK_IFINDEX)) {
411 		const struct rt6_info *rt6 = skb_rt6_info(skb);
412 
413 		if (rt6)
414 			iif = rt6->rt6i_idev->dev->ifindex;
415 	}
416 
417 	return iif;
418 }
419 
420 /*
421  *	Send an ICMP message in response to a packet in error
422  */
423 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
424 		       const struct in6_addr *force_saddr)
425 {
426 	struct net *net = dev_net(skb->dev);
427 	struct inet6_dev *idev = NULL;
428 	struct ipv6hdr *hdr = ipv6_hdr(skb);
429 	struct sock *sk;
430 	struct ipv6_pinfo *np;
431 	const struct in6_addr *saddr = NULL;
432 	struct dst_entry *dst;
433 	struct icmp6hdr tmp_hdr;
434 	struct flowi6 fl6;
435 	struct icmpv6_msg msg;
436 	struct sockcm_cookie sockc_unused = {0};
437 	struct ipcm6_cookie ipc6;
438 	int iif = 0;
439 	int addr_type = 0;
440 	int len;
441 	int err = 0;
442 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
443 
444 	if ((u8 *)hdr < skb->head ||
445 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
446 		return;
447 
448 	/*
449 	 *	Make sure we respect the rules
450 	 *	i.e. RFC 1885 2.4(e)
451 	 *	Rule (e.1) is enforced by not using icmp6_send
452 	 *	in any code that processes icmp errors.
453 	 */
454 	addr_type = ipv6_addr_type(&hdr->daddr);
455 
456 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
457 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
458 		saddr = &hdr->daddr;
459 
460 	/*
461 	 *	Dest addr check
462 	 */
463 
464 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
465 		if (type != ICMPV6_PKT_TOOBIG &&
466 		    !(type == ICMPV6_PARAMPROB &&
467 		      code == ICMPV6_UNK_OPTION &&
468 		      (opt_unrec(skb, info))))
469 			return;
470 
471 		saddr = NULL;
472 	}
473 
474 	addr_type = ipv6_addr_type(&hdr->saddr);
475 
476 	/*
477 	 *	Source addr check
478 	 */
479 
480 	if (__ipv6_addr_needs_scope_id(addr_type)) {
481 		iif = icmp6_iif(skb);
482 	} else {
483 		dst = skb_dst(skb);
484 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
485 	}
486 
487 	/*
488 	 *	Must not send error if the source does not uniquely
489 	 *	identify a single node (RFC2463 Section 2.4).
490 	 *	We check unspecified / multicast addresses here,
491 	 *	and anycast addresses will be checked later.
492 	 */
493 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
494 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
495 				    &hdr->saddr, &hdr->daddr);
496 		return;
497 	}
498 
499 	/*
500 	 *	Never answer to a ICMP packet.
501 	 */
502 	if (is_ineligible(skb)) {
503 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
504 				    &hdr->saddr, &hdr->daddr);
505 		return;
506 	}
507 
508 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
509 	local_bh_disable();
510 
511 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
512 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
513 		goto out_bh_enable;
514 
515 	mip6_addr_swap(skb);
516 
517 	memset(&fl6, 0, sizeof(fl6));
518 	fl6.flowi6_proto = IPPROTO_ICMPV6;
519 	fl6.daddr = hdr->saddr;
520 	if (force_saddr)
521 		saddr = force_saddr;
522 	if (saddr)
523 		fl6.saddr = *saddr;
524 	fl6.flowi6_mark = mark;
525 	fl6.flowi6_oif = iif;
526 	fl6.fl6_icmp_type = type;
527 	fl6.fl6_icmp_code = code;
528 	fl6.flowi6_uid = sock_net_uid(net, NULL);
529 	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
530 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
531 
532 	sk = icmpv6_xmit_lock(net);
533 	if (!sk)
534 		goto out_bh_enable;
535 
536 	sk->sk_mark = mark;
537 	np = inet6_sk(sk);
538 
539 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
540 		goto out;
541 
542 	tmp_hdr.icmp6_type = type;
543 	tmp_hdr.icmp6_code = code;
544 	tmp_hdr.icmp6_cksum = 0;
545 	tmp_hdr.icmp6_pointer = htonl(info);
546 
547 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
548 		fl6.flowi6_oif = np->mcast_oif;
549 	else if (!fl6.flowi6_oif)
550 		fl6.flowi6_oif = np->ucast_oif;
551 
552 	ipc6.tclass = np->tclass;
553 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
554 
555 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
556 	if (IS_ERR(dst))
557 		goto out;
558 
559 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
560 	ipc6.dontfrag = np->dontfrag;
561 	ipc6.opt = NULL;
562 
563 	msg.skb = skb;
564 	msg.offset = skb_network_offset(skb);
565 	msg.type = type;
566 
567 	len = skb->len - msg.offset;
568 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
569 	if (len < 0) {
570 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
571 				    &hdr->saddr, &hdr->daddr);
572 		goto out_dst_release;
573 	}
574 
575 	rcu_read_lock();
576 	idev = __in6_dev_get(skb->dev);
577 
578 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
579 			      len + sizeof(struct icmp6hdr),
580 			      sizeof(struct icmp6hdr),
581 			      &ipc6, &fl6, (struct rt6_info *)dst,
582 			      MSG_DONTWAIT, &sockc_unused);
583 	if (err) {
584 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
585 		ip6_flush_pending_frames(sk);
586 	} else {
587 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
588 						 len + sizeof(struct icmp6hdr));
589 	}
590 	rcu_read_unlock();
591 out_dst_release:
592 	dst_release(dst);
593 out:
594 	icmpv6_xmit_unlock(sk);
595 out_bh_enable:
596 	local_bh_enable();
597 }
598 
599 /* Slightly more convenient version of icmp6_send.
600  */
601 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
602 {
603 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
604 	kfree_skb(skb);
605 }
606 
607 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
608  * if sufficient data bytes are available
609  * @nhs is the size of the tunnel header(s) :
610  *  Either an IPv4 header for SIT encap
611  *         an IPv4 header + GRE header for GRE encap
612  */
613 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
614 			       unsigned int data_len)
615 {
616 	struct in6_addr temp_saddr;
617 	struct rt6_info *rt;
618 	struct sk_buff *skb2;
619 	u32 info = 0;
620 
621 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
622 		return 1;
623 
624 	/* RFC 4884 (partial) support for ICMP extensions */
625 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
626 		data_len = 0;
627 
628 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
629 
630 	if (!skb2)
631 		return 1;
632 
633 	skb_dst_drop(skb2);
634 	skb_pull(skb2, nhs);
635 	skb_reset_network_header(skb2);
636 
637 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
638 
639 	if (rt && rt->dst.dev)
640 		skb2->dev = rt->dst.dev;
641 
642 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
643 
644 	if (data_len) {
645 		/* RFC 4884 (partial) support :
646 		 * insert 0 padding at the end, before the extensions
647 		 */
648 		__skb_push(skb2, nhs);
649 		skb_reset_network_header(skb2);
650 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
651 		memset(skb2->data + data_len - nhs, 0, nhs);
652 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
653 		 * and stored in reserved[0]
654 		 */
655 		info = (data_len/8) << 24;
656 	}
657 	if (type == ICMP_TIME_EXCEEDED)
658 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
659 			   info, &temp_saddr);
660 	else
661 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
662 			   info, &temp_saddr);
663 	if (rt)
664 		ip6_rt_put(rt);
665 
666 	kfree_skb(skb2);
667 
668 	return 0;
669 }
670 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
671 
672 static void icmpv6_echo_reply(struct sk_buff *skb)
673 {
674 	struct net *net = dev_net(skb->dev);
675 	struct sock *sk;
676 	struct inet6_dev *idev;
677 	struct ipv6_pinfo *np;
678 	const struct in6_addr *saddr = NULL;
679 	struct icmp6hdr *icmph = icmp6_hdr(skb);
680 	struct icmp6hdr tmp_hdr;
681 	struct flowi6 fl6;
682 	struct icmpv6_msg msg;
683 	struct dst_entry *dst;
684 	struct ipcm6_cookie ipc6;
685 	int err = 0;
686 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
687 	struct sockcm_cookie sockc_unused = {0};
688 
689 	saddr = &ipv6_hdr(skb)->daddr;
690 
691 	if (!ipv6_unicast_destination(skb) &&
692 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
693 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
694 		saddr = NULL;
695 
696 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
697 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
698 
699 	memset(&fl6, 0, sizeof(fl6));
700 	fl6.flowi6_proto = IPPROTO_ICMPV6;
701 	fl6.daddr = ipv6_hdr(skb)->saddr;
702 	if (saddr)
703 		fl6.saddr = *saddr;
704 	fl6.flowi6_oif = icmp6_iif(skb);
705 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
706 	fl6.flowi6_mark = mark;
707 	fl6.flowi6_uid = sock_net_uid(net, NULL);
708 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
709 
710 	local_bh_disable();
711 	sk = icmpv6_xmit_lock(net);
712 	if (!sk)
713 		goto out_bh_enable;
714 	sk->sk_mark = mark;
715 	np = inet6_sk(sk);
716 
717 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
718 		fl6.flowi6_oif = np->mcast_oif;
719 	else if (!fl6.flowi6_oif)
720 		fl6.flowi6_oif = np->ucast_oif;
721 
722 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
723 	if (err)
724 		goto out;
725 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
726 	if (IS_ERR(dst))
727 		goto out;
728 
729 	idev = __in6_dev_get(skb->dev);
730 
731 	msg.skb = skb;
732 	msg.offset = 0;
733 	msg.type = ICMPV6_ECHO_REPLY;
734 
735 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
736 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
737 	ipc6.dontfrag = np->dontfrag;
738 	ipc6.opt = NULL;
739 
740 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
741 				sizeof(struct icmp6hdr), &ipc6, &fl6,
742 				(struct rt6_info *)dst, MSG_DONTWAIT,
743 				&sockc_unused);
744 
745 	if (err) {
746 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
747 		ip6_flush_pending_frames(sk);
748 	} else {
749 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
750 						 skb->len + sizeof(struct icmp6hdr));
751 	}
752 	dst_release(dst);
753 out:
754 	icmpv6_xmit_unlock(sk);
755 out_bh_enable:
756 	local_bh_enable();
757 }
758 
759 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
760 {
761 	const struct inet6_protocol *ipprot;
762 	int inner_offset;
763 	__be16 frag_off;
764 	u8 nexthdr;
765 	struct net *net = dev_net(skb->dev);
766 
767 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
768 		goto out;
769 
770 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
771 	if (ipv6_ext_hdr(nexthdr)) {
772 		/* now skip over extension headers */
773 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
774 						&nexthdr, &frag_off);
775 		if (inner_offset < 0)
776 			goto out;
777 	} else {
778 		inner_offset = sizeof(struct ipv6hdr);
779 	}
780 
781 	/* Checkin header including 8 bytes of inner protocol header. */
782 	if (!pskb_may_pull(skb, inner_offset+8))
783 		goto out;
784 
785 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
786 	   Without this we will not able f.e. to make source routed
787 	   pmtu discovery.
788 	   Corresponding argument (opt) to notifiers is already added.
789 	   --ANK (980726)
790 	 */
791 
792 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
793 	if (ipprot && ipprot->err_handler)
794 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
795 
796 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
797 	return;
798 
799 out:
800 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
801 }
802 
803 /*
804  *	Handle icmp messages
805  */
806 
807 static int icmpv6_rcv(struct sk_buff *skb)
808 {
809 	struct net_device *dev = skb->dev;
810 	struct inet6_dev *idev = __in6_dev_get(dev);
811 	const struct in6_addr *saddr, *daddr;
812 	struct icmp6hdr *hdr;
813 	u8 type;
814 	bool success = false;
815 
816 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
817 		struct sec_path *sp = skb_sec_path(skb);
818 		int nh;
819 
820 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
821 				 XFRM_STATE_ICMP))
822 			goto drop_no_count;
823 
824 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
825 			goto drop_no_count;
826 
827 		nh = skb_network_offset(skb);
828 		skb_set_network_header(skb, sizeof(*hdr));
829 
830 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
831 			goto drop_no_count;
832 
833 		skb_set_network_header(skb, nh);
834 	}
835 
836 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
837 
838 	saddr = &ipv6_hdr(skb)->saddr;
839 	daddr = &ipv6_hdr(skb)->daddr;
840 
841 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
842 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
843 				    saddr, daddr);
844 		goto csum_error;
845 	}
846 
847 	if (!pskb_pull(skb, sizeof(*hdr)))
848 		goto discard_it;
849 
850 	hdr = icmp6_hdr(skb);
851 
852 	type = hdr->icmp6_type;
853 
854 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
855 
856 	switch (type) {
857 	case ICMPV6_ECHO_REQUEST:
858 		icmpv6_echo_reply(skb);
859 		break;
860 
861 	case ICMPV6_ECHO_REPLY:
862 		success = ping_rcv(skb);
863 		break;
864 
865 	case ICMPV6_PKT_TOOBIG:
866 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
867 		   standard destination cache. Seems, only "advanced"
868 		   destination cache will allow to solve this problem
869 		   --ANK (980726)
870 		 */
871 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
872 			goto discard_it;
873 		hdr = icmp6_hdr(skb);
874 
875 		/*
876 		 *	Drop through to notify
877 		 */
878 
879 	case ICMPV6_DEST_UNREACH:
880 	case ICMPV6_TIME_EXCEED:
881 	case ICMPV6_PARAMPROB:
882 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
883 		break;
884 
885 	case NDISC_ROUTER_SOLICITATION:
886 	case NDISC_ROUTER_ADVERTISEMENT:
887 	case NDISC_NEIGHBOUR_SOLICITATION:
888 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
889 	case NDISC_REDIRECT:
890 		ndisc_rcv(skb);
891 		break;
892 
893 	case ICMPV6_MGM_QUERY:
894 		igmp6_event_query(skb);
895 		break;
896 
897 	case ICMPV6_MGM_REPORT:
898 		igmp6_event_report(skb);
899 		break;
900 
901 	case ICMPV6_MGM_REDUCTION:
902 	case ICMPV6_NI_QUERY:
903 	case ICMPV6_NI_REPLY:
904 	case ICMPV6_MLD2_REPORT:
905 	case ICMPV6_DHAAD_REQUEST:
906 	case ICMPV6_DHAAD_REPLY:
907 	case ICMPV6_MOBILE_PREFIX_SOL:
908 	case ICMPV6_MOBILE_PREFIX_ADV:
909 		break;
910 
911 	default:
912 		/* informational */
913 		if (type & ICMPV6_INFOMSG_MASK)
914 			break;
915 
916 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
917 				    saddr, daddr);
918 
919 		/*
920 		 * error of unknown type.
921 		 * must pass to upper level
922 		 */
923 
924 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
925 	}
926 
927 	/* until the v6 path can be better sorted assume failure and
928 	 * preserve the status quo behaviour for the rest of the paths to here
929 	 */
930 	if (success)
931 		consume_skb(skb);
932 	else
933 		kfree_skb(skb);
934 
935 	return 0;
936 
937 csum_error:
938 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
939 discard_it:
940 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
941 drop_no_count:
942 	kfree_skb(skb);
943 	return 0;
944 }
945 
946 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
947 		      u8 type,
948 		      const struct in6_addr *saddr,
949 		      const struct in6_addr *daddr,
950 		      int oif)
951 {
952 	memset(fl6, 0, sizeof(*fl6));
953 	fl6->saddr = *saddr;
954 	fl6->daddr = *daddr;
955 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
956 	fl6->fl6_icmp_type	= type;
957 	fl6->fl6_icmp_code	= 0;
958 	fl6->flowi6_oif		= oif;
959 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
960 }
961 
962 static int __net_init icmpv6_sk_init(struct net *net)
963 {
964 	struct sock *sk;
965 	int err, i, j;
966 
967 	net->ipv6.icmp_sk =
968 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
969 	if (!net->ipv6.icmp_sk)
970 		return -ENOMEM;
971 
972 	for_each_possible_cpu(i) {
973 		err = inet_ctl_sock_create(&sk, PF_INET6,
974 					   SOCK_RAW, IPPROTO_ICMPV6, net);
975 		if (err < 0) {
976 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
977 			       err);
978 			goto fail;
979 		}
980 
981 		net->ipv6.icmp_sk[i] = sk;
982 
983 		/* Enough space for 2 64K ICMP packets, including
984 		 * sk_buff struct overhead.
985 		 */
986 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
987 	}
988 	return 0;
989 
990  fail:
991 	for (j = 0; j < i; j++)
992 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
993 	kfree(net->ipv6.icmp_sk);
994 	return err;
995 }
996 
997 static void __net_exit icmpv6_sk_exit(struct net *net)
998 {
999 	int i;
1000 
1001 	for_each_possible_cpu(i) {
1002 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
1003 	}
1004 	kfree(net->ipv6.icmp_sk);
1005 }
1006 
1007 static struct pernet_operations icmpv6_sk_ops = {
1008 	.init = icmpv6_sk_init,
1009 	.exit = icmpv6_sk_exit,
1010 };
1011 
1012 int __init icmpv6_init(void)
1013 {
1014 	int err;
1015 
1016 	err = register_pernet_subsys(&icmpv6_sk_ops);
1017 	if (err < 0)
1018 		return err;
1019 
1020 	err = -EAGAIN;
1021 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1022 		goto fail;
1023 
1024 	err = inet6_register_icmp_sender(icmp6_send);
1025 	if (err)
1026 		goto sender_reg_err;
1027 	return 0;
1028 
1029 sender_reg_err:
1030 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1031 fail:
1032 	pr_err("Failed to register ICMP6 protocol\n");
1033 	unregister_pernet_subsys(&icmpv6_sk_ops);
1034 	return err;
1035 }
1036 
1037 void icmpv6_cleanup(void)
1038 {
1039 	inet6_unregister_icmp_sender(icmp6_send);
1040 	unregister_pernet_subsys(&icmpv6_sk_ops);
1041 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1042 }
1043 
1044 
1045 static const struct icmp6_err {
1046 	int err;
1047 	int fatal;
1048 } tab_unreach[] = {
1049 	{	/* NOROUTE */
1050 		.err	= ENETUNREACH,
1051 		.fatal	= 0,
1052 	},
1053 	{	/* ADM_PROHIBITED */
1054 		.err	= EACCES,
1055 		.fatal	= 1,
1056 	},
1057 	{	/* Was NOT_NEIGHBOUR, now reserved */
1058 		.err	= EHOSTUNREACH,
1059 		.fatal	= 0,
1060 	},
1061 	{	/* ADDR_UNREACH	*/
1062 		.err	= EHOSTUNREACH,
1063 		.fatal	= 0,
1064 	},
1065 	{	/* PORT_UNREACH	*/
1066 		.err	= ECONNREFUSED,
1067 		.fatal	= 1,
1068 	},
1069 	{	/* POLICY_FAIL */
1070 		.err	= EACCES,
1071 		.fatal	= 1,
1072 	},
1073 	{	/* REJECT_ROUTE	*/
1074 		.err	= EACCES,
1075 		.fatal	= 1,
1076 	},
1077 };
1078 
1079 int icmpv6_err_convert(u8 type, u8 code, int *err)
1080 {
1081 	int fatal = 0;
1082 
1083 	*err = EPROTO;
1084 
1085 	switch (type) {
1086 	case ICMPV6_DEST_UNREACH:
1087 		fatal = 1;
1088 		if (code < ARRAY_SIZE(tab_unreach)) {
1089 			*err  = tab_unreach[code].err;
1090 			fatal = tab_unreach[code].fatal;
1091 		}
1092 		break;
1093 
1094 	case ICMPV6_PKT_TOOBIG:
1095 		*err = EMSGSIZE;
1096 		break;
1097 
1098 	case ICMPV6_PARAMPROB:
1099 		*err = EPROTO;
1100 		fatal = 1;
1101 		break;
1102 
1103 	case ICMPV6_TIME_EXCEED:
1104 		*err = EHOSTUNREACH;
1105 		break;
1106 	}
1107 
1108 	return fatal;
1109 }
1110 EXPORT_SYMBOL(icmpv6_err_convert);
1111 
1112 #ifdef CONFIG_SYSCTL
1113 static struct ctl_table ipv6_icmp_table_template[] = {
1114 	{
1115 		.procname	= "ratelimit",
1116 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1117 		.maxlen		= sizeof(int),
1118 		.mode		= 0644,
1119 		.proc_handler	= proc_dointvec_ms_jiffies,
1120 	},
1121 	{ },
1122 };
1123 
1124 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1125 {
1126 	struct ctl_table *table;
1127 
1128 	table = kmemdup(ipv6_icmp_table_template,
1129 			sizeof(ipv6_icmp_table_template),
1130 			GFP_KERNEL);
1131 
1132 	if (table)
1133 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1134 
1135 	return table;
1136 }
1137 #endif
1138