xref: /openbmc/linux/net/ipv6/icmp.c (revision 16ccca11088c1bdd9311a2c630b453541305c48a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
63 #include <net/icmp.h>
64 #include <net/xfrm.h>
65 #include <net/inet_common.h>
66 #include <net/dsfield.h>
67 #include <net/l3mdev.h>
68 
69 #include <linux/uaccess.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static struct sock *icmpv6_sk(struct net *net)
79 {
80 	return this_cpu_read(*net->ipv6.icmp_sk);
81 }
82 
83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84 		       u8 type, u8 code, int offset, __be32 info)
85 {
86 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88 	struct net *net = dev_net(skb->dev);
89 
90 	if (type == ICMPV6_PKT_TOOBIG)
91 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92 	else if (type == NDISC_REDIRECT)
93 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
94 			     sock_net_uid(net, NULL));
95 
96 	if (!(type & ICMPV6_INFOMSG_MASK))
97 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98 			ping_err(skb, offset, ntohl(info));
99 
100 	return 0;
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 /* Called with BH disabled */
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		return NULL;
123 	}
124 	return sk;
125 }
126 
127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128 {
129 	spin_unlock(&sk->sk_lock.slock);
130 }
131 
132 /*
133  * Figure out, may we reply to this packet with icmp error.
134  *
135  * We do not reply, if:
136  *	- it was icmp error message.
137  *	- it is truncated, so that it is known, that protocol is ICMPV6
138  *	  (i.e. in the middle of some exthdr)
139  *
140  *	--ANK (980726)
141  */
142 
143 static bool is_ineligible(const struct sk_buff *skb)
144 {
145 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146 	int len = skb->len - ptr;
147 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148 	__be16 frag_off;
149 
150 	if (len < 0)
151 		return true;
152 
153 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154 	if (ptr < 0)
155 		return false;
156 	if (nexthdr == IPPROTO_ICMPV6) {
157 		u8 _type, *tp;
158 		tp = skb_header_pointer(skb,
159 			ptr+offsetof(struct icmp6hdr, icmp6_type),
160 			sizeof(_type), &_type);
161 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
162 			return true;
163 	}
164 	return false;
165 }
166 
167 static bool icmpv6_mask_allow(struct net *net, int type)
168 {
169 	if (type > ICMPV6_MSG_MAX)
170 		return true;
171 
172 	/* Limit if icmp type is set in ratemask. */
173 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
174 		return true;
175 
176 	return false;
177 }
178 
179 static bool icmpv6_global_allow(struct net *net, int type)
180 {
181 	if (icmpv6_mask_allow(net, type))
182 		return true;
183 
184 	if (icmp_global_allow())
185 		return true;
186 
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	dst_release(dst);
229 	return res;
230 }
231 
232 /*
233  *	an inline helper for the "simple" if statement below
234  *	checks if parameter problem report is caused by an
235  *	unrecognized IPv6 option that has the Option Type
236  *	highest-order two bits set to 10
237  */
238 
239 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
240 {
241 	u8 _optval, *op;
242 
243 	offset += skb_network_offset(skb);
244 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
245 	if (!op)
246 		return true;
247 	return (*op & 0xC0) == 0x80;
248 }
249 
250 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
251 				struct icmp6hdr *thdr, int len)
252 {
253 	struct sk_buff *skb;
254 	struct icmp6hdr *icmp6h;
255 
256 	skb = skb_peek(&sk->sk_write_queue);
257 	if (!skb)
258 		return;
259 
260 	icmp6h = icmp6_hdr(skb);
261 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
262 	icmp6h->icmp6_cksum = 0;
263 
264 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
265 		skb->csum = csum_partial(icmp6h,
266 					sizeof(struct icmp6hdr), skb->csum);
267 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
268 						      &fl6->daddr,
269 						      len, fl6->flowi6_proto,
270 						      skb->csum);
271 	} else {
272 		__wsum tmp_csum = 0;
273 
274 		skb_queue_walk(&sk->sk_write_queue, skb) {
275 			tmp_csum = csum_add(tmp_csum, skb->csum);
276 		}
277 
278 		tmp_csum = csum_partial(icmp6h,
279 					sizeof(struct icmp6hdr), tmp_csum);
280 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
281 						      &fl6->daddr,
282 						      len, fl6->flowi6_proto,
283 						      tmp_csum);
284 	}
285 	ip6_push_pending_frames(sk);
286 }
287 
288 struct icmpv6_msg {
289 	struct sk_buff	*skb;
290 	int		offset;
291 	uint8_t		type;
292 };
293 
294 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
295 {
296 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
297 	struct sk_buff *org_skb = msg->skb;
298 	__wsum csum = 0;
299 
300 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
301 				      to, len, csum);
302 	skb->csum = csum_block_add(skb->csum, csum, odd);
303 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
304 		nf_ct_attach(skb, org_skb);
305 	return 0;
306 }
307 
308 #if IS_ENABLED(CONFIG_IPV6_MIP6)
309 static void mip6_addr_swap(struct sk_buff *skb)
310 {
311 	struct ipv6hdr *iph = ipv6_hdr(skb);
312 	struct inet6_skb_parm *opt = IP6CB(skb);
313 	struct ipv6_destopt_hao *hao;
314 	struct in6_addr tmp;
315 	int off;
316 
317 	if (opt->dsthao) {
318 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
319 		if (likely(off >= 0)) {
320 			hao = (struct ipv6_destopt_hao *)
321 					(skb_network_header(skb) + off);
322 			tmp = iph->saddr;
323 			iph->saddr = hao->addr;
324 			hao->addr = tmp;
325 		}
326 	}
327 }
328 #else
329 static inline void mip6_addr_swap(struct sk_buff *skb) {}
330 #endif
331 
332 static struct dst_entry *icmpv6_route_lookup(struct net *net,
333 					     struct sk_buff *skb,
334 					     struct sock *sk,
335 					     struct flowi6 *fl6)
336 {
337 	struct dst_entry *dst, *dst2;
338 	struct flowi6 fl2;
339 	int err;
340 
341 	err = ip6_dst_lookup(net, sk, &dst, fl6);
342 	if (err)
343 		return ERR_PTR(err);
344 
345 	/*
346 	 * We won't send icmp if the destination is known
347 	 * anycast.
348 	 */
349 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
350 		net_dbg_ratelimited("icmp6_send: acast source\n");
351 		dst_release(dst);
352 		return ERR_PTR(-EINVAL);
353 	}
354 
355 	/* No need to clone since we're just using its address. */
356 	dst2 = dst;
357 
358 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
359 	if (!IS_ERR(dst)) {
360 		if (dst != dst2)
361 			return dst;
362 	} else {
363 		if (PTR_ERR(dst) == -EPERM)
364 			dst = NULL;
365 		else
366 			return dst;
367 	}
368 
369 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
370 	if (err)
371 		goto relookup_failed;
372 
373 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
374 	if (err)
375 		goto relookup_failed;
376 
377 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
378 	if (!IS_ERR(dst2)) {
379 		dst_release(dst);
380 		dst = dst2;
381 	} else {
382 		err = PTR_ERR(dst2);
383 		if (err == -EPERM) {
384 			dst_release(dst);
385 			return dst2;
386 		} else
387 			goto relookup_failed;
388 	}
389 
390 relookup_failed:
391 	if (dst)
392 		return dst;
393 	return ERR_PTR(err);
394 }
395 
396 static struct net_device *icmp6_dev(const struct sk_buff *skb)
397 {
398 	struct net_device *dev = skb->dev;
399 
400 	/* for local traffic to local address, skb dev is the loopback
401 	 * device. Check if there is a dst attached to the skb and if so
402 	 * get the real device index. Same is needed for replies to a link
403 	 * local address on a device enslaved to an L3 master device
404 	 */
405 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
406 		const struct rt6_info *rt6 = skb_rt6_info(skb);
407 
408 		if (rt6)
409 			dev = rt6->rt6i_idev->dev;
410 	}
411 
412 	return dev;
413 }
414 
415 static int icmp6_iif(const struct sk_buff *skb)
416 {
417 	return icmp6_dev(skb)->ifindex;
418 }
419 
420 /*
421  *	Send an ICMP message in response to a packet in error
422  */
423 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
424 		       const struct in6_addr *force_saddr)
425 {
426 	struct inet6_dev *idev = NULL;
427 	struct ipv6hdr *hdr = ipv6_hdr(skb);
428 	struct sock *sk;
429 	struct net *net;
430 	struct ipv6_pinfo *np;
431 	const struct in6_addr *saddr = NULL;
432 	struct dst_entry *dst;
433 	struct icmp6hdr tmp_hdr;
434 	struct flowi6 fl6;
435 	struct icmpv6_msg msg;
436 	struct ipcm6_cookie ipc6;
437 	int iif = 0;
438 	int addr_type = 0;
439 	int len;
440 	u32 mark;
441 
442 	if ((u8 *)hdr < skb->head ||
443 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
444 		return;
445 
446 	if (!skb->dev)
447 		return;
448 	net = dev_net(skb->dev);
449 	mark = IP6_REPLY_MARK(net, skb->mark);
450 	/*
451 	 *	Make sure we respect the rules
452 	 *	i.e. RFC 1885 2.4(e)
453 	 *	Rule (e.1) is enforced by not using icmp6_send
454 	 *	in any code that processes icmp errors.
455 	 */
456 	addr_type = ipv6_addr_type(&hdr->daddr);
457 
458 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
459 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
460 		saddr = &hdr->daddr;
461 
462 	/*
463 	 *	Dest addr check
464 	 */
465 
466 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
467 		if (type != ICMPV6_PKT_TOOBIG &&
468 		    !(type == ICMPV6_PARAMPROB &&
469 		      code == ICMPV6_UNK_OPTION &&
470 		      (opt_unrec(skb, info))))
471 			return;
472 
473 		saddr = NULL;
474 	}
475 
476 	addr_type = ipv6_addr_type(&hdr->saddr);
477 
478 	/*
479 	 *	Source addr check
480 	 */
481 
482 	if (__ipv6_addr_needs_scope_id(addr_type)) {
483 		iif = icmp6_iif(skb);
484 	} else {
485 		dst = skb_dst(skb);
486 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
487 	}
488 
489 	/*
490 	 *	Must not send error if the source does not uniquely
491 	 *	identify a single node (RFC2463 Section 2.4).
492 	 *	We check unspecified / multicast addresses here,
493 	 *	and anycast addresses will be checked later.
494 	 */
495 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
496 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
497 				    &hdr->saddr, &hdr->daddr);
498 		return;
499 	}
500 
501 	/*
502 	 *	Never answer to a ICMP packet.
503 	 */
504 	if (is_ineligible(skb)) {
505 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
506 				    &hdr->saddr, &hdr->daddr);
507 		return;
508 	}
509 
510 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
511 	local_bh_disable();
512 
513 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
514 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
515 		goto out_bh_enable;
516 
517 	mip6_addr_swap(skb);
518 
519 	memset(&fl6, 0, sizeof(fl6));
520 	fl6.flowi6_proto = IPPROTO_ICMPV6;
521 	fl6.daddr = hdr->saddr;
522 	if (force_saddr)
523 		saddr = force_saddr;
524 	if (saddr)
525 		fl6.saddr = *saddr;
526 	fl6.flowi6_mark = mark;
527 	fl6.flowi6_oif = iif;
528 	fl6.fl6_icmp_type = type;
529 	fl6.fl6_icmp_code = code;
530 	fl6.flowi6_uid = sock_net_uid(net, NULL);
531 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
532 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
533 
534 	sk = icmpv6_xmit_lock(net);
535 	if (!sk)
536 		goto out_bh_enable;
537 
538 	sk->sk_mark = mark;
539 	np = inet6_sk(sk);
540 
541 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
542 		goto out;
543 
544 	tmp_hdr.icmp6_type = type;
545 	tmp_hdr.icmp6_code = code;
546 	tmp_hdr.icmp6_cksum = 0;
547 	tmp_hdr.icmp6_pointer = htonl(info);
548 
549 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
550 		fl6.flowi6_oif = np->mcast_oif;
551 	else if (!fl6.flowi6_oif)
552 		fl6.flowi6_oif = np->ucast_oif;
553 
554 	ipcm6_init_sk(&ipc6, np);
555 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
556 
557 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
558 	if (IS_ERR(dst))
559 		goto out;
560 
561 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
562 
563 	msg.skb = skb;
564 	msg.offset = skb_network_offset(skb);
565 	msg.type = type;
566 
567 	len = skb->len - msg.offset;
568 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
569 	if (len < 0) {
570 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
571 				    &hdr->saddr, &hdr->daddr);
572 		goto out_dst_release;
573 	}
574 
575 	rcu_read_lock();
576 	idev = __in6_dev_get(skb->dev);
577 
578 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
579 			    len + sizeof(struct icmp6hdr),
580 			    sizeof(struct icmp6hdr),
581 			    &ipc6, &fl6, (struct rt6_info *)dst,
582 			    MSG_DONTWAIT)) {
583 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
584 		ip6_flush_pending_frames(sk);
585 	} else {
586 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
587 					   len + sizeof(struct icmp6hdr));
588 	}
589 	rcu_read_unlock();
590 out_dst_release:
591 	dst_release(dst);
592 out:
593 	icmpv6_xmit_unlock(sk);
594 out_bh_enable:
595 	local_bh_enable();
596 }
597 
598 /* Slightly more convenient version of icmp6_send.
599  */
600 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
601 {
602 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
603 	kfree_skb(skb);
604 }
605 
606 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
607  * if sufficient data bytes are available
608  * @nhs is the size of the tunnel header(s) :
609  *  Either an IPv4 header for SIT encap
610  *         an IPv4 header + GRE header for GRE encap
611  */
612 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
613 			       unsigned int data_len)
614 {
615 	struct in6_addr temp_saddr;
616 	struct rt6_info *rt;
617 	struct sk_buff *skb2;
618 	u32 info = 0;
619 
620 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
621 		return 1;
622 
623 	/* RFC 4884 (partial) support for ICMP extensions */
624 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
625 		data_len = 0;
626 
627 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
628 
629 	if (!skb2)
630 		return 1;
631 
632 	skb_dst_drop(skb2);
633 	skb_pull(skb2, nhs);
634 	skb_reset_network_header(skb2);
635 
636 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
637 			skb, 0);
638 
639 	if (rt && rt->dst.dev)
640 		skb2->dev = rt->dst.dev;
641 
642 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
643 
644 	if (data_len) {
645 		/* RFC 4884 (partial) support :
646 		 * insert 0 padding at the end, before the extensions
647 		 */
648 		__skb_push(skb2, nhs);
649 		skb_reset_network_header(skb2);
650 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
651 		memset(skb2->data + data_len - nhs, 0, nhs);
652 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
653 		 * and stored in reserved[0]
654 		 */
655 		info = (data_len/8) << 24;
656 	}
657 	if (type == ICMP_TIME_EXCEEDED)
658 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
659 			   info, &temp_saddr);
660 	else
661 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
662 			   info, &temp_saddr);
663 	if (rt)
664 		ip6_rt_put(rt);
665 
666 	kfree_skb(skb2);
667 
668 	return 0;
669 }
670 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
671 
672 static void icmpv6_echo_reply(struct sk_buff *skb)
673 {
674 	struct net *net = dev_net(skb->dev);
675 	struct sock *sk;
676 	struct inet6_dev *idev;
677 	struct ipv6_pinfo *np;
678 	const struct in6_addr *saddr = NULL;
679 	struct icmp6hdr *icmph = icmp6_hdr(skb);
680 	struct icmp6hdr tmp_hdr;
681 	struct flowi6 fl6;
682 	struct icmpv6_msg msg;
683 	struct dst_entry *dst;
684 	struct ipcm6_cookie ipc6;
685 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
686 	bool acast;
687 
688 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
689 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
690 		return;
691 
692 	saddr = &ipv6_hdr(skb)->daddr;
693 
694 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
695 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
696 		return;
697 
698 	if (!ipv6_unicast_destination(skb) &&
699 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
700 		saddr = NULL;
701 
702 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
703 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
704 
705 	memset(&fl6, 0, sizeof(fl6));
706 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
707 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
708 
709 	fl6.flowi6_proto = IPPROTO_ICMPV6;
710 	fl6.daddr = ipv6_hdr(skb)->saddr;
711 	if (saddr)
712 		fl6.saddr = *saddr;
713 	fl6.flowi6_oif = icmp6_iif(skb);
714 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
715 	fl6.flowi6_mark = mark;
716 	fl6.flowi6_uid = sock_net_uid(net, NULL);
717 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
718 
719 	local_bh_disable();
720 	sk = icmpv6_xmit_lock(net);
721 	if (!sk)
722 		goto out_bh_enable;
723 	sk->sk_mark = mark;
724 	np = inet6_sk(sk);
725 
726 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
727 		fl6.flowi6_oif = np->mcast_oif;
728 	else if (!fl6.flowi6_oif)
729 		fl6.flowi6_oif = np->ucast_oif;
730 
731 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
732 		goto out;
733 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
734 	if (IS_ERR(dst))
735 		goto out;
736 
737 	/* Check the ratelimit */
738 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
739 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
740 		goto out_dst_release;
741 
742 	idev = __in6_dev_get(skb->dev);
743 
744 	msg.skb = skb;
745 	msg.offset = 0;
746 	msg.type = ICMPV6_ECHO_REPLY;
747 
748 	ipcm6_init_sk(&ipc6, np);
749 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
750 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
751 
752 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
753 			    skb->len + sizeof(struct icmp6hdr),
754 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
755 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
756 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
757 		ip6_flush_pending_frames(sk);
758 	} else {
759 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
760 					   skb->len + sizeof(struct icmp6hdr));
761 	}
762 out_dst_release:
763 	dst_release(dst);
764 out:
765 	icmpv6_xmit_unlock(sk);
766 out_bh_enable:
767 	local_bh_enable();
768 }
769 
770 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
771 {
772 	const struct inet6_protocol *ipprot;
773 	int inner_offset;
774 	__be16 frag_off;
775 	u8 nexthdr;
776 	struct net *net = dev_net(skb->dev);
777 
778 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
779 		goto out;
780 
781 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
782 	if (ipv6_ext_hdr(nexthdr)) {
783 		/* now skip over extension headers */
784 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
785 						&nexthdr, &frag_off);
786 		if (inner_offset < 0)
787 			goto out;
788 	} else {
789 		inner_offset = sizeof(struct ipv6hdr);
790 	}
791 
792 	/* Checkin header including 8 bytes of inner protocol header. */
793 	if (!pskb_may_pull(skb, inner_offset+8))
794 		goto out;
795 
796 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
797 	   Without this we will not able f.e. to make source routed
798 	   pmtu discovery.
799 	   Corresponding argument (opt) to notifiers is already added.
800 	   --ANK (980726)
801 	 */
802 
803 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
804 	if (ipprot && ipprot->err_handler)
805 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
806 
807 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
808 	return;
809 
810 out:
811 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
812 }
813 
814 /*
815  *	Handle icmp messages
816  */
817 
818 static int icmpv6_rcv(struct sk_buff *skb)
819 {
820 	struct net *net = dev_net(skb->dev);
821 	struct net_device *dev = icmp6_dev(skb);
822 	struct inet6_dev *idev = __in6_dev_get(dev);
823 	const struct in6_addr *saddr, *daddr;
824 	struct icmp6hdr *hdr;
825 	u8 type;
826 	bool success = false;
827 
828 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
829 		struct sec_path *sp = skb_sec_path(skb);
830 		int nh;
831 
832 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
833 				 XFRM_STATE_ICMP))
834 			goto drop_no_count;
835 
836 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
837 			goto drop_no_count;
838 
839 		nh = skb_network_offset(skb);
840 		skb_set_network_header(skb, sizeof(*hdr));
841 
842 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
843 			goto drop_no_count;
844 
845 		skb_set_network_header(skb, nh);
846 	}
847 
848 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
849 
850 	saddr = &ipv6_hdr(skb)->saddr;
851 	daddr = &ipv6_hdr(skb)->daddr;
852 
853 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
854 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
855 				    saddr, daddr);
856 		goto csum_error;
857 	}
858 
859 	if (!pskb_pull(skb, sizeof(*hdr)))
860 		goto discard_it;
861 
862 	hdr = icmp6_hdr(skb);
863 
864 	type = hdr->icmp6_type;
865 
866 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
867 
868 	switch (type) {
869 	case ICMPV6_ECHO_REQUEST:
870 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
871 			icmpv6_echo_reply(skb);
872 		break;
873 
874 	case ICMPV6_ECHO_REPLY:
875 		success = ping_rcv(skb);
876 		break;
877 
878 	case ICMPV6_PKT_TOOBIG:
879 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
880 		   standard destination cache. Seems, only "advanced"
881 		   destination cache will allow to solve this problem
882 		   --ANK (980726)
883 		 */
884 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
885 			goto discard_it;
886 		hdr = icmp6_hdr(skb);
887 
888 		/* to notify */
889 		/* fall through */
890 	case ICMPV6_DEST_UNREACH:
891 	case ICMPV6_TIME_EXCEED:
892 	case ICMPV6_PARAMPROB:
893 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
894 		break;
895 
896 	case NDISC_ROUTER_SOLICITATION:
897 	case NDISC_ROUTER_ADVERTISEMENT:
898 	case NDISC_NEIGHBOUR_SOLICITATION:
899 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
900 	case NDISC_REDIRECT:
901 		ndisc_rcv(skb);
902 		break;
903 
904 	case ICMPV6_MGM_QUERY:
905 		igmp6_event_query(skb);
906 		break;
907 
908 	case ICMPV6_MGM_REPORT:
909 		igmp6_event_report(skb);
910 		break;
911 
912 	case ICMPV6_MGM_REDUCTION:
913 	case ICMPV6_NI_QUERY:
914 	case ICMPV6_NI_REPLY:
915 	case ICMPV6_MLD2_REPORT:
916 	case ICMPV6_DHAAD_REQUEST:
917 	case ICMPV6_DHAAD_REPLY:
918 	case ICMPV6_MOBILE_PREFIX_SOL:
919 	case ICMPV6_MOBILE_PREFIX_ADV:
920 		break;
921 
922 	default:
923 		/* informational */
924 		if (type & ICMPV6_INFOMSG_MASK)
925 			break;
926 
927 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
928 				    saddr, daddr);
929 
930 		/*
931 		 * error of unknown type.
932 		 * must pass to upper level
933 		 */
934 
935 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
936 	}
937 
938 	/* until the v6 path can be better sorted assume failure and
939 	 * preserve the status quo behaviour for the rest of the paths to here
940 	 */
941 	if (success)
942 		consume_skb(skb);
943 	else
944 		kfree_skb(skb);
945 
946 	return 0;
947 
948 csum_error:
949 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
950 discard_it:
951 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
952 drop_no_count:
953 	kfree_skb(skb);
954 	return 0;
955 }
956 
957 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
958 		      u8 type,
959 		      const struct in6_addr *saddr,
960 		      const struct in6_addr *daddr,
961 		      int oif)
962 {
963 	memset(fl6, 0, sizeof(*fl6));
964 	fl6->saddr = *saddr;
965 	fl6->daddr = *daddr;
966 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
967 	fl6->fl6_icmp_type	= type;
968 	fl6->fl6_icmp_code	= 0;
969 	fl6->flowi6_oif		= oif;
970 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
971 }
972 
973 static void __net_exit icmpv6_sk_exit(struct net *net)
974 {
975 	int i;
976 
977 	for_each_possible_cpu(i)
978 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
979 	free_percpu(net->ipv6.icmp_sk);
980 }
981 
982 static int __net_init icmpv6_sk_init(struct net *net)
983 {
984 	struct sock *sk;
985 	int err, i;
986 
987 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
988 	if (!net->ipv6.icmp_sk)
989 		return -ENOMEM;
990 
991 	for_each_possible_cpu(i) {
992 		err = inet_ctl_sock_create(&sk, PF_INET6,
993 					   SOCK_RAW, IPPROTO_ICMPV6, net);
994 		if (err < 0) {
995 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
996 			       err);
997 			goto fail;
998 		}
999 
1000 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1001 
1002 		/* Enough space for 2 64K ICMP packets, including
1003 		 * sk_buff struct overhead.
1004 		 */
1005 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1006 	}
1007 	return 0;
1008 
1009  fail:
1010 	icmpv6_sk_exit(net);
1011 	return err;
1012 }
1013 
1014 static struct pernet_operations icmpv6_sk_ops = {
1015 	.init = icmpv6_sk_init,
1016 	.exit = icmpv6_sk_exit,
1017 };
1018 
1019 int __init icmpv6_init(void)
1020 {
1021 	int err;
1022 
1023 	err = register_pernet_subsys(&icmpv6_sk_ops);
1024 	if (err < 0)
1025 		return err;
1026 
1027 	err = -EAGAIN;
1028 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1029 		goto fail;
1030 
1031 	err = inet6_register_icmp_sender(icmp6_send);
1032 	if (err)
1033 		goto sender_reg_err;
1034 	return 0;
1035 
1036 sender_reg_err:
1037 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1038 fail:
1039 	pr_err("Failed to register ICMP6 protocol\n");
1040 	unregister_pernet_subsys(&icmpv6_sk_ops);
1041 	return err;
1042 }
1043 
1044 void icmpv6_cleanup(void)
1045 {
1046 	inet6_unregister_icmp_sender(icmp6_send);
1047 	unregister_pernet_subsys(&icmpv6_sk_ops);
1048 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1049 }
1050 
1051 
1052 static const struct icmp6_err {
1053 	int err;
1054 	int fatal;
1055 } tab_unreach[] = {
1056 	{	/* NOROUTE */
1057 		.err	= ENETUNREACH,
1058 		.fatal	= 0,
1059 	},
1060 	{	/* ADM_PROHIBITED */
1061 		.err	= EACCES,
1062 		.fatal	= 1,
1063 	},
1064 	{	/* Was NOT_NEIGHBOUR, now reserved */
1065 		.err	= EHOSTUNREACH,
1066 		.fatal	= 0,
1067 	},
1068 	{	/* ADDR_UNREACH	*/
1069 		.err	= EHOSTUNREACH,
1070 		.fatal	= 0,
1071 	},
1072 	{	/* PORT_UNREACH	*/
1073 		.err	= ECONNREFUSED,
1074 		.fatal	= 1,
1075 	},
1076 	{	/* POLICY_FAIL */
1077 		.err	= EACCES,
1078 		.fatal	= 1,
1079 	},
1080 	{	/* REJECT_ROUTE	*/
1081 		.err	= EACCES,
1082 		.fatal	= 1,
1083 	},
1084 };
1085 
1086 int icmpv6_err_convert(u8 type, u8 code, int *err)
1087 {
1088 	int fatal = 0;
1089 
1090 	*err = EPROTO;
1091 
1092 	switch (type) {
1093 	case ICMPV6_DEST_UNREACH:
1094 		fatal = 1;
1095 		if (code < ARRAY_SIZE(tab_unreach)) {
1096 			*err  = tab_unreach[code].err;
1097 			fatal = tab_unreach[code].fatal;
1098 		}
1099 		break;
1100 
1101 	case ICMPV6_PKT_TOOBIG:
1102 		*err = EMSGSIZE;
1103 		break;
1104 
1105 	case ICMPV6_PARAMPROB:
1106 		*err = EPROTO;
1107 		fatal = 1;
1108 		break;
1109 
1110 	case ICMPV6_TIME_EXCEED:
1111 		*err = EHOSTUNREACH;
1112 		break;
1113 	}
1114 
1115 	return fatal;
1116 }
1117 EXPORT_SYMBOL(icmpv6_err_convert);
1118 
1119 #ifdef CONFIG_SYSCTL
1120 static struct ctl_table ipv6_icmp_table_template[] = {
1121 	{
1122 		.procname	= "ratelimit",
1123 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1124 		.maxlen		= sizeof(int),
1125 		.mode		= 0644,
1126 		.proc_handler	= proc_dointvec_ms_jiffies,
1127 	},
1128 	{
1129 		.procname	= "echo_ignore_all",
1130 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1131 		.maxlen		= sizeof(int),
1132 		.mode		= 0644,
1133 		.proc_handler = proc_dointvec,
1134 	},
1135 	{
1136 		.procname	= "echo_ignore_multicast",
1137 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1138 		.maxlen		= sizeof(int),
1139 		.mode		= 0644,
1140 		.proc_handler = proc_dointvec,
1141 	},
1142 	{
1143 		.procname	= "echo_ignore_anycast",
1144 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1145 		.maxlen		= sizeof(int),
1146 		.mode		= 0644,
1147 		.proc_handler = proc_dointvec,
1148 	},
1149 	{
1150 		.procname	= "ratemask",
1151 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1152 		.maxlen		= ICMPV6_MSG_MAX + 1,
1153 		.mode		= 0644,
1154 		.proc_handler = proc_do_large_bitmap,
1155 	},
1156 	{ },
1157 };
1158 
1159 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1160 {
1161 	struct ctl_table *table;
1162 
1163 	table = kmemdup(ipv6_icmp_table_template,
1164 			sizeof(ipv6_icmp_table_template),
1165 			GFP_KERNEL);
1166 
1167 	if (table) {
1168 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1169 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1170 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1171 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1172 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1173 	}
1174 	return table;
1175 }
1176 #endif
1177