xref: /openbmc/linux/net/ipv6/icmp.c (revision 0bc19985)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return *this_cpu_ptr(net->ipv6.icmp_sk);
85 }
86 
87 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 
104 	return 0;
105 }
106 
107 static int icmpv6_rcv(struct sk_buff *skb);
108 
109 static const struct inet6_protocol icmpv6_protocol = {
110 	.handler	=	icmpv6_rcv,
111 	.err_handler	=	icmpv6_err,
112 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
113 };
114 
115 /* Called with BH disabled */
116 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
117 {
118 	struct sock *sk;
119 
120 	sk = icmpv6_sk(net);
121 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
122 		/* This can happen if the output path (f.e. SIT or
123 		 * ip6ip6 tunnel) signals dst_link_failure() for an
124 		 * outgoing ICMP6 packet.
125 		 */
126 		return NULL;
127 	}
128 	return sk;
129 }
130 
131 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
132 {
133 	spin_unlock(&sk->sk_lock.slock);
134 }
135 
136 /*
137  * Figure out, may we reply to this packet with icmp error.
138  *
139  * We do not reply, if:
140  *	- it was icmp error message.
141  *	- it is truncated, so that it is known, that protocol is ICMPV6
142  *	  (i.e. in the middle of some exthdr)
143  *
144  *	--ANK (980726)
145  */
146 
147 static bool is_ineligible(const struct sk_buff *skb)
148 {
149 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
150 	int len = skb->len - ptr;
151 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
152 	__be16 frag_off;
153 
154 	if (len < 0)
155 		return true;
156 
157 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
158 	if (ptr < 0)
159 		return false;
160 	if (nexthdr == IPPROTO_ICMPV6) {
161 		u8 _type, *tp;
162 		tp = skb_header_pointer(skb,
163 			ptr+offsetof(struct icmp6hdr, icmp6_type),
164 			sizeof(_type), &_type);
165 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
166 			return true;
167 	}
168 	return false;
169 }
170 
171 static bool icmpv6_mask_allow(struct net *net, int type)
172 {
173 	if (type > ICMPV6_MSG_MAX)
174 		return true;
175 
176 	/* Limit if icmp type is set in ratemask. */
177 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
178 		return true;
179 
180 	return false;
181 }
182 
183 static bool icmpv6_global_allow(struct net *net, int type)
184 {
185 	if (icmpv6_mask_allow(net, type))
186 		return true;
187 
188 	if (icmp_global_allow())
189 		return true;
190 
191 	return false;
192 }
193 
194 /*
195  * Check the ICMP output rate limit
196  */
197 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
198 			       struct flowi6 *fl6)
199 {
200 	struct net *net = sock_net(sk);
201 	struct dst_entry *dst;
202 	bool res = false;
203 
204 	if (icmpv6_mask_allow(net, type))
205 		return true;
206 
207 	/*
208 	 * Look up the output route.
209 	 * XXX: perhaps the expire for routing entries cloned by
210 	 * this lookup should be more aggressive (not longer than timeout).
211 	 */
212 	dst = ip6_route_output(net, sk, fl6);
213 	if (dst->error) {
214 		IP6_INC_STATS(net, ip6_dst_idev(dst),
215 			      IPSTATS_MIB_OUTNOROUTES);
216 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
217 		res = true;
218 	} else {
219 		struct rt6_info *rt = (struct rt6_info *)dst;
220 		int tmo = net->ipv6.sysctl.icmpv6_time;
221 		struct inet_peer *peer;
222 
223 		/* Give more bandwidth to wider prefixes. */
224 		if (rt->rt6i_dst.plen < 128)
225 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
226 
227 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
228 		res = inet_peer_xrlim_allow(peer, tmo);
229 		if (peer)
230 			inet_putpeer(peer);
231 	}
232 	dst_release(dst);
233 	return res;
234 }
235 
236 /*
237  *	an inline helper for the "simple" if statement below
238  *	checks if parameter problem report is caused by an
239  *	unrecognized IPv6 option that has the Option Type
240  *	highest-order two bits set to 10
241  */
242 
243 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
244 {
245 	u8 _optval, *op;
246 
247 	offset += skb_network_offset(skb);
248 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
249 	if (!op)
250 		return true;
251 	return (*op & 0xC0) == 0x80;
252 }
253 
254 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
255 				struct icmp6hdr *thdr, int len)
256 {
257 	struct sk_buff *skb;
258 	struct icmp6hdr *icmp6h;
259 
260 	skb = skb_peek(&sk->sk_write_queue);
261 	if (!skb)
262 		return;
263 
264 	icmp6h = icmp6_hdr(skb);
265 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
266 	icmp6h->icmp6_cksum = 0;
267 
268 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
269 		skb->csum = csum_partial(icmp6h,
270 					sizeof(struct icmp6hdr), skb->csum);
271 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
272 						      &fl6->daddr,
273 						      len, fl6->flowi6_proto,
274 						      skb->csum);
275 	} else {
276 		__wsum tmp_csum = 0;
277 
278 		skb_queue_walk(&sk->sk_write_queue, skb) {
279 			tmp_csum = csum_add(tmp_csum, skb->csum);
280 		}
281 
282 		tmp_csum = csum_partial(icmp6h,
283 					sizeof(struct icmp6hdr), tmp_csum);
284 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
285 						      &fl6->daddr,
286 						      len, fl6->flowi6_proto,
287 						      tmp_csum);
288 	}
289 	ip6_push_pending_frames(sk);
290 }
291 
292 struct icmpv6_msg {
293 	struct sk_buff	*skb;
294 	int		offset;
295 	uint8_t		type;
296 };
297 
298 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
299 {
300 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
301 	struct sk_buff *org_skb = msg->skb;
302 	__wsum csum = 0;
303 
304 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
305 				      to, len, csum);
306 	skb->csum = csum_block_add(skb->csum, csum, odd);
307 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
308 		nf_ct_attach(skb, org_skb);
309 	return 0;
310 }
311 
312 #if IS_ENABLED(CONFIG_IPV6_MIP6)
313 static void mip6_addr_swap(struct sk_buff *skb)
314 {
315 	struct ipv6hdr *iph = ipv6_hdr(skb);
316 	struct inet6_skb_parm *opt = IP6CB(skb);
317 	struct ipv6_destopt_hao *hao;
318 	struct in6_addr tmp;
319 	int off;
320 
321 	if (opt->dsthao) {
322 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
323 		if (likely(off >= 0)) {
324 			hao = (struct ipv6_destopt_hao *)
325 					(skb_network_header(skb) + off);
326 			tmp = iph->saddr;
327 			iph->saddr = hao->addr;
328 			hao->addr = tmp;
329 		}
330 	}
331 }
332 #else
333 static inline void mip6_addr_swap(struct sk_buff *skb) {}
334 #endif
335 
336 static struct dst_entry *icmpv6_route_lookup(struct net *net,
337 					     struct sk_buff *skb,
338 					     struct sock *sk,
339 					     struct flowi6 *fl6)
340 {
341 	struct dst_entry *dst, *dst2;
342 	struct flowi6 fl2;
343 	int err;
344 
345 	err = ip6_dst_lookup(net, sk, &dst, fl6);
346 	if (err)
347 		return ERR_PTR(err);
348 
349 	/*
350 	 * We won't send icmp if the destination is known
351 	 * anycast.
352 	 */
353 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
354 		net_dbg_ratelimited("icmp6_send: acast source\n");
355 		dst_release(dst);
356 		return ERR_PTR(-EINVAL);
357 	}
358 
359 	/* No need to clone since we're just using its address. */
360 	dst2 = dst;
361 
362 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
363 	if (!IS_ERR(dst)) {
364 		if (dst != dst2)
365 			return dst;
366 	} else {
367 		if (PTR_ERR(dst) == -EPERM)
368 			dst = NULL;
369 		else
370 			return dst;
371 	}
372 
373 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
374 	if (err)
375 		goto relookup_failed;
376 
377 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
378 	if (err)
379 		goto relookup_failed;
380 
381 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
382 	if (!IS_ERR(dst2)) {
383 		dst_release(dst);
384 		dst = dst2;
385 	} else {
386 		err = PTR_ERR(dst2);
387 		if (err == -EPERM) {
388 			dst_release(dst);
389 			return dst2;
390 		} else
391 			goto relookup_failed;
392 	}
393 
394 relookup_failed:
395 	if (dst)
396 		return dst;
397 	return ERR_PTR(err);
398 }
399 
400 static int icmp6_iif(const struct sk_buff *skb)
401 {
402 	int iif = skb->dev->ifindex;
403 
404 	/* for local traffic to local address, skb dev is the loopback
405 	 * device. Check if there is a dst attached to the skb and if so
406 	 * get the real device index. Same is needed for replies to a link
407 	 * local address on a device enslaved to an L3 master device
408 	 */
409 	if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
410 		const struct rt6_info *rt6 = skb_rt6_info(skb);
411 
412 		if (rt6)
413 			iif = rt6->rt6i_idev->dev->ifindex;
414 	}
415 
416 	return iif;
417 }
418 
419 /*
420  *	Send an ICMP message in response to a packet in error
421  */
422 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
423 		       const struct in6_addr *force_saddr)
424 {
425 	struct inet6_dev *idev = NULL;
426 	struct ipv6hdr *hdr = ipv6_hdr(skb);
427 	struct sock *sk;
428 	struct net *net;
429 	struct ipv6_pinfo *np;
430 	const struct in6_addr *saddr = NULL;
431 	struct dst_entry *dst;
432 	struct icmp6hdr tmp_hdr;
433 	struct flowi6 fl6;
434 	struct icmpv6_msg msg;
435 	struct ipcm6_cookie ipc6;
436 	int iif = 0;
437 	int addr_type = 0;
438 	int len;
439 	u32 mark;
440 
441 	if ((u8 *)hdr < skb->head ||
442 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
443 		return;
444 
445 	if (!skb->dev)
446 		return;
447 	net = dev_net(skb->dev);
448 	mark = IP6_REPLY_MARK(net, skb->mark);
449 	/*
450 	 *	Make sure we respect the rules
451 	 *	i.e. RFC 1885 2.4(e)
452 	 *	Rule (e.1) is enforced by not using icmp6_send
453 	 *	in any code that processes icmp errors.
454 	 */
455 	addr_type = ipv6_addr_type(&hdr->daddr);
456 
457 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
458 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
459 		saddr = &hdr->daddr;
460 
461 	/*
462 	 *	Dest addr check
463 	 */
464 
465 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
466 		if (type != ICMPV6_PKT_TOOBIG &&
467 		    !(type == ICMPV6_PARAMPROB &&
468 		      code == ICMPV6_UNK_OPTION &&
469 		      (opt_unrec(skb, info))))
470 			return;
471 
472 		saddr = NULL;
473 	}
474 
475 	addr_type = ipv6_addr_type(&hdr->saddr);
476 
477 	/*
478 	 *	Source addr check
479 	 */
480 
481 	if (__ipv6_addr_needs_scope_id(addr_type)) {
482 		iif = icmp6_iif(skb);
483 	} else {
484 		dst = skb_dst(skb);
485 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
486 	}
487 
488 	/*
489 	 *	Must not send error if the source does not uniquely
490 	 *	identify a single node (RFC2463 Section 2.4).
491 	 *	We check unspecified / multicast addresses here,
492 	 *	and anycast addresses will be checked later.
493 	 */
494 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
495 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
496 				    &hdr->saddr, &hdr->daddr);
497 		return;
498 	}
499 
500 	/*
501 	 *	Never answer to a ICMP packet.
502 	 */
503 	if (is_ineligible(skb)) {
504 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
505 				    &hdr->saddr, &hdr->daddr);
506 		return;
507 	}
508 
509 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
510 	local_bh_disable();
511 
512 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
513 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
514 		goto out_bh_enable;
515 
516 	mip6_addr_swap(skb);
517 
518 	memset(&fl6, 0, sizeof(fl6));
519 	fl6.flowi6_proto = IPPROTO_ICMPV6;
520 	fl6.daddr = hdr->saddr;
521 	if (force_saddr)
522 		saddr = force_saddr;
523 	if (saddr)
524 		fl6.saddr = *saddr;
525 	fl6.flowi6_mark = mark;
526 	fl6.flowi6_oif = iif;
527 	fl6.fl6_icmp_type = type;
528 	fl6.fl6_icmp_code = code;
529 	fl6.flowi6_uid = sock_net_uid(net, NULL);
530 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
531 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
532 
533 	sk = icmpv6_xmit_lock(net);
534 	if (!sk)
535 		goto out_bh_enable;
536 
537 	sk->sk_mark = mark;
538 	np = inet6_sk(sk);
539 
540 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
541 		goto out;
542 
543 	tmp_hdr.icmp6_type = type;
544 	tmp_hdr.icmp6_code = code;
545 	tmp_hdr.icmp6_cksum = 0;
546 	tmp_hdr.icmp6_pointer = htonl(info);
547 
548 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
549 		fl6.flowi6_oif = np->mcast_oif;
550 	else if (!fl6.flowi6_oif)
551 		fl6.flowi6_oif = np->ucast_oif;
552 
553 	ipcm6_init_sk(&ipc6, np);
554 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
555 
556 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
557 	if (IS_ERR(dst))
558 		goto out;
559 
560 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
561 
562 	msg.skb = skb;
563 	msg.offset = skb_network_offset(skb);
564 	msg.type = type;
565 
566 	len = skb->len - msg.offset;
567 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
568 	if (len < 0) {
569 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
570 				    &hdr->saddr, &hdr->daddr);
571 		goto out_dst_release;
572 	}
573 
574 	rcu_read_lock();
575 	idev = __in6_dev_get(skb->dev);
576 
577 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
578 			    len + sizeof(struct icmp6hdr),
579 			    sizeof(struct icmp6hdr),
580 			    &ipc6, &fl6, (struct rt6_info *)dst,
581 			    MSG_DONTWAIT)) {
582 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
583 		ip6_flush_pending_frames(sk);
584 	} else {
585 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
586 					   len + sizeof(struct icmp6hdr));
587 	}
588 	rcu_read_unlock();
589 out_dst_release:
590 	dst_release(dst);
591 out:
592 	icmpv6_xmit_unlock(sk);
593 out_bh_enable:
594 	local_bh_enable();
595 }
596 
597 /* Slightly more convenient version of icmp6_send.
598  */
599 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
600 {
601 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
602 	kfree_skb(skb);
603 }
604 
605 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
606  * if sufficient data bytes are available
607  * @nhs is the size of the tunnel header(s) :
608  *  Either an IPv4 header for SIT encap
609  *         an IPv4 header + GRE header for GRE encap
610  */
611 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
612 			       unsigned int data_len)
613 {
614 	struct in6_addr temp_saddr;
615 	struct rt6_info *rt;
616 	struct sk_buff *skb2;
617 	u32 info = 0;
618 
619 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
620 		return 1;
621 
622 	/* RFC 4884 (partial) support for ICMP extensions */
623 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
624 		data_len = 0;
625 
626 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
627 
628 	if (!skb2)
629 		return 1;
630 
631 	skb_dst_drop(skb2);
632 	skb_pull(skb2, nhs);
633 	skb_reset_network_header(skb2);
634 
635 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
636 			skb, 0);
637 
638 	if (rt && rt->dst.dev)
639 		skb2->dev = rt->dst.dev;
640 
641 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
642 
643 	if (data_len) {
644 		/* RFC 4884 (partial) support :
645 		 * insert 0 padding at the end, before the extensions
646 		 */
647 		__skb_push(skb2, nhs);
648 		skb_reset_network_header(skb2);
649 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
650 		memset(skb2->data + data_len - nhs, 0, nhs);
651 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
652 		 * and stored in reserved[0]
653 		 */
654 		info = (data_len/8) << 24;
655 	}
656 	if (type == ICMP_TIME_EXCEEDED)
657 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
658 			   info, &temp_saddr);
659 	else
660 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
661 			   info, &temp_saddr);
662 	if (rt)
663 		ip6_rt_put(rt);
664 
665 	kfree_skb(skb2);
666 
667 	return 0;
668 }
669 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
670 
671 static void icmpv6_echo_reply(struct sk_buff *skb)
672 {
673 	struct net *net = dev_net(skb->dev);
674 	struct sock *sk;
675 	struct inet6_dev *idev;
676 	struct ipv6_pinfo *np;
677 	const struct in6_addr *saddr = NULL;
678 	struct icmp6hdr *icmph = icmp6_hdr(skb);
679 	struct icmp6hdr tmp_hdr;
680 	struct flowi6 fl6;
681 	struct icmpv6_msg msg;
682 	struct dst_entry *dst;
683 	struct ipcm6_cookie ipc6;
684 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
685 	bool acast;
686 
687 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
688 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
689 		return;
690 
691 	saddr = &ipv6_hdr(skb)->daddr;
692 
693 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
694 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
695 		return;
696 
697 	if (!ipv6_unicast_destination(skb) &&
698 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
699 		saddr = NULL;
700 
701 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
702 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
703 
704 	memset(&fl6, 0, sizeof(fl6));
705 	fl6.flowi6_proto = IPPROTO_ICMPV6;
706 	fl6.daddr = ipv6_hdr(skb)->saddr;
707 	if (saddr)
708 		fl6.saddr = *saddr;
709 	fl6.flowi6_oif = icmp6_iif(skb);
710 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
711 	fl6.flowi6_mark = mark;
712 	fl6.flowi6_uid = sock_net_uid(net, NULL);
713 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
714 
715 	local_bh_disable();
716 	sk = icmpv6_xmit_lock(net);
717 	if (!sk)
718 		goto out_bh_enable;
719 	sk->sk_mark = mark;
720 	np = inet6_sk(sk);
721 
722 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
723 		fl6.flowi6_oif = np->mcast_oif;
724 	else if (!fl6.flowi6_oif)
725 		fl6.flowi6_oif = np->ucast_oif;
726 
727 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
728 		goto out;
729 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
730 	if (IS_ERR(dst))
731 		goto out;
732 
733 	/* Check the ratelimit */
734 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
735 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
736 		goto out_dst_release;
737 
738 	idev = __in6_dev_get(skb->dev);
739 
740 	msg.skb = skb;
741 	msg.offset = 0;
742 	msg.type = ICMPV6_ECHO_REPLY;
743 
744 	ipcm6_init_sk(&ipc6, np);
745 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
746 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
747 
748 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
749 			    skb->len + sizeof(struct icmp6hdr),
750 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
751 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
752 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
753 		ip6_flush_pending_frames(sk);
754 	} else {
755 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
756 					   skb->len + sizeof(struct icmp6hdr));
757 	}
758 out_dst_release:
759 	dst_release(dst);
760 out:
761 	icmpv6_xmit_unlock(sk);
762 out_bh_enable:
763 	local_bh_enable();
764 }
765 
766 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
767 {
768 	const struct inet6_protocol *ipprot;
769 	int inner_offset;
770 	__be16 frag_off;
771 	u8 nexthdr;
772 	struct net *net = dev_net(skb->dev);
773 
774 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
775 		goto out;
776 
777 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
778 	if (ipv6_ext_hdr(nexthdr)) {
779 		/* now skip over extension headers */
780 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
781 						&nexthdr, &frag_off);
782 		if (inner_offset < 0)
783 			goto out;
784 	} else {
785 		inner_offset = sizeof(struct ipv6hdr);
786 	}
787 
788 	/* Checkin header including 8 bytes of inner protocol header. */
789 	if (!pskb_may_pull(skb, inner_offset+8))
790 		goto out;
791 
792 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
793 	   Without this we will not able f.e. to make source routed
794 	   pmtu discovery.
795 	   Corresponding argument (opt) to notifiers is already added.
796 	   --ANK (980726)
797 	 */
798 
799 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
800 	if (ipprot && ipprot->err_handler)
801 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
802 
803 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
804 	return;
805 
806 out:
807 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
808 }
809 
810 /*
811  *	Handle icmp messages
812  */
813 
814 static int icmpv6_rcv(struct sk_buff *skb)
815 {
816 	struct net *net = dev_net(skb->dev);
817 	struct net_device *dev = skb->dev;
818 	struct inet6_dev *idev = __in6_dev_get(dev);
819 	const struct in6_addr *saddr, *daddr;
820 	struct icmp6hdr *hdr;
821 	u8 type;
822 	bool success = false;
823 
824 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
825 		struct sec_path *sp = skb_sec_path(skb);
826 		int nh;
827 
828 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
829 				 XFRM_STATE_ICMP))
830 			goto drop_no_count;
831 
832 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
833 			goto drop_no_count;
834 
835 		nh = skb_network_offset(skb);
836 		skb_set_network_header(skb, sizeof(*hdr));
837 
838 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
839 			goto drop_no_count;
840 
841 		skb_set_network_header(skb, nh);
842 	}
843 
844 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
845 
846 	saddr = &ipv6_hdr(skb)->saddr;
847 	daddr = &ipv6_hdr(skb)->daddr;
848 
849 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
850 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
851 				    saddr, daddr);
852 		goto csum_error;
853 	}
854 
855 	if (!pskb_pull(skb, sizeof(*hdr)))
856 		goto discard_it;
857 
858 	hdr = icmp6_hdr(skb);
859 
860 	type = hdr->icmp6_type;
861 
862 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
863 
864 	switch (type) {
865 	case ICMPV6_ECHO_REQUEST:
866 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
867 			icmpv6_echo_reply(skb);
868 		break;
869 
870 	case ICMPV6_ECHO_REPLY:
871 		success = ping_rcv(skb);
872 		break;
873 
874 	case ICMPV6_PKT_TOOBIG:
875 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
876 		   standard destination cache. Seems, only "advanced"
877 		   destination cache will allow to solve this problem
878 		   --ANK (980726)
879 		 */
880 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
881 			goto discard_it;
882 		hdr = icmp6_hdr(skb);
883 
884 		/* to notify */
885 		/* fall through */
886 	case ICMPV6_DEST_UNREACH:
887 	case ICMPV6_TIME_EXCEED:
888 	case ICMPV6_PARAMPROB:
889 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
890 		break;
891 
892 	case NDISC_ROUTER_SOLICITATION:
893 	case NDISC_ROUTER_ADVERTISEMENT:
894 	case NDISC_NEIGHBOUR_SOLICITATION:
895 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
896 	case NDISC_REDIRECT:
897 		ndisc_rcv(skb);
898 		break;
899 
900 	case ICMPV6_MGM_QUERY:
901 		igmp6_event_query(skb);
902 		break;
903 
904 	case ICMPV6_MGM_REPORT:
905 		igmp6_event_report(skb);
906 		break;
907 
908 	case ICMPV6_MGM_REDUCTION:
909 	case ICMPV6_NI_QUERY:
910 	case ICMPV6_NI_REPLY:
911 	case ICMPV6_MLD2_REPORT:
912 	case ICMPV6_DHAAD_REQUEST:
913 	case ICMPV6_DHAAD_REPLY:
914 	case ICMPV6_MOBILE_PREFIX_SOL:
915 	case ICMPV6_MOBILE_PREFIX_ADV:
916 		break;
917 
918 	default:
919 		/* informational */
920 		if (type & ICMPV6_INFOMSG_MASK)
921 			break;
922 
923 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
924 				    saddr, daddr);
925 
926 		/*
927 		 * error of unknown type.
928 		 * must pass to upper level
929 		 */
930 
931 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
932 	}
933 
934 	/* until the v6 path can be better sorted assume failure and
935 	 * preserve the status quo behaviour for the rest of the paths to here
936 	 */
937 	if (success)
938 		consume_skb(skb);
939 	else
940 		kfree_skb(skb);
941 
942 	return 0;
943 
944 csum_error:
945 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
946 discard_it:
947 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
948 drop_no_count:
949 	kfree_skb(skb);
950 	return 0;
951 }
952 
953 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
954 		      u8 type,
955 		      const struct in6_addr *saddr,
956 		      const struct in6_addr *daddr,
957 		      int oif)
958 {
959 	memset(fl6, 0, sizeof(*fl6));
960 	fl6->saddr = *saddr;
961 	fl6->daddr = *daddr;
962 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
963 	fl6->fl6_icmp_type	= type;
964 	fl6->fl6_icmp_code	= 0;
965 	fl6->flowi6_oif		= oif;
966 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
967 }
968 
969 static void __net_exit icmpv6_sk_exit(struct net *net)
970 {
971 	int i;
972 
973 	for_each_possible_cpu(i)
974 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
975 	free_percpu(net->ipv6.icmp_sk);
976 }
977 
978 static int __net_init icmpv6_sk_init(struct net *net)
979 {
980 	struct sock *sk;
981 	int err, i;
982 
983 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
984 	if (!net->ipv6.icmp_sk)
985 		return -ENOMEM;
986 
987 	for_each_possible_cpu(i) {
988 		err = inet_ctl_sock_create(&sk, PF_INET6,
989 					   SOCK_RAW, IPPROTO_ICMPV6, net);
990 		if (err < 0) {
991 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
992 			       err);
993 			goto fail;
994 		}
995 
996 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
997 
998 		/* Enough space for 2 64K ICMP packets, including
999 		 * sk_buff struct overhead.
1000 		 */
1001 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1002 	}
1003 	return 0;
1004 
1005  fail:
1006 	icmpv6_sk_exit(net);
1007 	return err;
1008 }
1009 
1010 static struct pernet_operations icmpv6_sk_ops = {
1011 	.init = icmpv6_sk_init,
1012 	.exit = icmpv6_sk_exit,
1013 };
1014 
1015 int __init icmpv6_init(void)
1016 {
1017 	int err;
1018 
1019 	err = register_pernet_subsys(&icmpv6_sk_ops);
1020 	if (err < 0)
1021 		return err;
1022 
1023 	err = -EAGAIN;
1024 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1025 		goto fail;
1026 
1027 	err = inet6_register_icmp_sender(icmp6_send);
1028 	if (err)
1029 		goto sender_reg_err;
1030 	return 0;
1031 
1032 sender_reg_err:
1033 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1034 fail:
1035 	pr_err("Failed to register ICMP6 protocol\n");
1036 	unregister_pernet_subsys(&icmpv6_sk_ops);
1037 	return err;
1038 }
1039 
1040 void icmpv6_cleanup(void)
1041 {
1042 	inet6_unregister_icmp_sender(icmp6_send);
1043 	unregister_pernet_subsys(&icmpv6_sk_ops);
1044 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1045 }
1046 
1047 
1048 static const struct icmp6_err {
1049 	int err;
1050 	int fatal;
1051 } tab_unreach[] = {
1052 	{	/* NOROUTE */
1053 		.err	= ENETUNREACH,
1054 		.fatal	= 0,
1055 	},
1056 	{	/* ADM_PROHIBITED */
1057 		.err	= EACCES,
1058 		.fatal	= 1,
1059 	},
1060 	{	/* Was NOT_NEIGHBOUR, now reserved */
1061 		.err	= EHOSTUNREACH,
1062 		.fatal	= 0,
1063 	},
1064 	{	/* ADDR_UNREACH	*/
1065 		.err	= EHOSTUNREACH,
1066 		.fatal	= 0,
1067 	},
1068 	{	/* PORT_UNREACH	*/
1069 		.err	= ECONNREFUSED,
1070 		.fatal	= 1,
1071 	},
1072 	{	/* POLICY_FAIL */
1073 		.err	= EACCES,
1074 		.fatal	= 1,
1075 	},
1076 	{	/* REJECT_ROUTE	*/
1077 		.err	= EACCES,
1078 		.fatal	= 1,
1079 	},
1080 };
1081 
1082 int icmpv6_err_convert(u8 type, u8 code, int *err)
1083 {
1084 	int fatal = 0;
1085 
1086 	*err = EPROTO;
1087 
1088 	switch (type) {
1089 	case ICMPV6_DEST_UNREACH:
1090 		fatal = 1;
1091 		if (code < ARRAY_SIZE(tab_unreach)) {
1092 			*err  = tab_unreach[code].err;
1093 			fatal = tab_unreach[code].fatal;
1094 		}
1095 		break;
1096 
1097 	case ICMPV6_PKT_TOOBIG:
1098 		*err = EMSGSIZE;
1099 		break;
1100 
1101 	case ICMPV6_PARAMPROB:
1102 		*err = EPROTO;
1103 		fatal = 1;
1104 		break;
1105 
1106 	case ICMPV6_TIME_EXCEED:
1107 		*err = EHOSTUNREACH;
1108 		break;
1109 	}
1110 
1111 	return fatal;
1112 }
1113 EXPORT_SYMBOL(icmpv6_err_convert);
1114 
1115 #ifdef CONFIG_SYSCTL
1116 static struct ctl_table ipv6_icmp_table_template[] = {
1117 	{
1118 		.procname	= "ratelimit",
1119 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1120 		.maxlen		= sizeof(int),
1121 		.mode		= 0644,
1122 		.proc_handler	= proc_dointvec_ms_jiffies,
1123 	},
1124 	{
1125 		.procname	= "echo_ignore_all",
1126 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1127 		.maxlen		= sizeof(int),
1128 		.mode		= 0644,
1129 		.proc_handler = proc_dointvec,
1130 	},
1131 	{
1132 		.procname	= "echo_ignore_multicast",
1133 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1134 		.maxlen		= sizeof(int),
1135 		.mode		= 0644,
1136 		.proc_handler = proc_dointvec,
1137 	},
1138 	{
1139 		.procname	= "echo_ignore_anycast",
1140 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1141 		.maxlen		= sizeof(int),
1142 		.mode		= 0644,
1143 		.proc_handler = proc_dointvec,
1144 	},
1145 	{
1146 		.procname	= "ratemask",
1147 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1148 		.maxlen		= ICMPV6_MSG_MAX + 1,
1149 		.mode		= 0644,
1150 		.proc_handler = proc_do_large_bitmap,
1151 	},
1152 	{ },
1153 };
1154 
1155 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1156 {
1157 	struct ctl_table *table;
1158 
1159 	table = kmemdup(ipv6_icmp_table_template,
1160 			sizeof(ipv6_icmp_table_template),
1161 			GFP_KERNEL);
1162 
1163 	if (table) {
1164 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1165 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1166 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1167 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1168 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1169 	}
1170 	return table;
1171 }
1172 #endif
1173