xref: /openbmc/linux/net/ipv6/icmp.c (revision 87fcfa7b7fe6bf819033fe827a27f710e38639b5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Internet Control Message Protocol (ICMPv6)
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on net/ipv4/icmp.c
10  *
11  *	RFC 1885
12  */
13 
14 /*
15  *	Changes:
16  *
17  *	Andi Kleen		:	exception handling
18  *	Andi Kleen			add rate limits. never reply to a icmp.
19  *					add more length checks and other fixes.
20  *	yoshfuji		:	ensure to sent parameter problem for
21  *					fragments.
22  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23  *	Randy Dunlap and
24  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27 
28 #define pr_fmt(fmt) "IPv6: " fmt
29 
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42 
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46 
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50 
51 #include <net/ip.h>
52 #include <net/sock.h>
53 
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/transp_v6.h>
61 #include <net/ip6_route.h>
62 #include <net/addrconf.h>
63 #include <net/icmp.h>
64 #include <net/xfrm.h>
65 #include <net/inet_common.h>
66 #include <net/dsfield.h>
67 #include <net/l3mdev.h>
68 
69 #include <linux/uaccess.h>
70 
71 /*
72  *	The ICMP socket(s). This is the most convenient way to flow control
73  *	our ICMP output as well as maintain a clean interface throughout
74  *	all layers. All Socketless IP sends will soon be gone.
75  *
76  *	On SMP we have one ICMP socket per-cpu.
77  */
78 static struct sock *icmpv6_sk(struct net *net)
79 {
80 	return this_cpu_read(*net->ipv6.icmp_sk);
81 }
82 
83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84 		       u8 type, u8 code, int offset, __be32 info)
85 {
86 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88 	struct net *net = dev_net(skb->dev);
89 
90 	if (type == ICMPV6_PKT_TOOBIG)
91 		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92 	else if (type == NDISC_REDIRECT)
93 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
94 			     sock_net_uid(net, NULL));
95 
96 	if (!(type & ICMPV6_INFOMSG_MASK))
97 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98 			ping_err(skb, offset, ntohl(info));
99 
100 	return 0;
101 }
102 
103 static int icmpv6_rcv(struct sk_buff *skb);
104 
105 static const struct inet6_protocol icmpv6_protocol = {
106 	.handler	=	icmpv6_rcv,
107 	.err_handler	=	icmpv6_err,
108 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109 };
110 
111 /* Called with BH disabled */
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	sk = icmpv6_sk(net);
117 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118 		/* This can happen if the output path (f.e. SIT or
119 		 * ip6ip6 tunnel) signals dst_link_failure() for an
120 		 * outgoing ICMP6 packet.
121 		 */
122 		return NULL;
123 	}
124 	return sk;
125 }
126 
127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128 {
129 	spin_unlock(&sk->sk_lock.slock);
130 }
131 
132 /*
133  * Figure out, may we reply to this packet with icmp error.
134  *
135  * We do not reply, if:
136  *	- it was icmp error message.
137  *	- it is truncated, so that it is known, that protocol is ICMPV6
138  *	  (i.e. in the middle of some exthdr)
139  *
140  *	--ANK (980726)
141  */
142 
143 static bool is_ineligible(const struct sk_buff *skb)
144 {
145 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146 	int len = skb->len - ptr;
147 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148 	__be16 frag_off;
149 
150 	if (len < 0)
151 		return true;
152 
153 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154 	if (ptr < 0)
155 		return false;
156 	if (nexthdr == IPPROTO_ICMPV6) {
157 		u8 _type, *tp;
158 		tp = skb_header_pointer(skb,
159 			ptr+offsetof(struct icmp6hdr, icmp6_type),
160 			sizeof(_type), &_type);
161 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
162 			return true;
163 	}
164 	return false;
165 }
166 
167 static bool icmpv6_mask_allow(struct net *net, int type)
168 {
169 	if (type > ICMPV6_MSG_MAX)
170 		return true;
171 
172 	/* Limit if icmp type is set in ratemask. */
173 	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
174 		return true;
175 
176 	return false;
177 }
178 
179 static bool icmpv6_global_allow(struct net *net, int type)
180 {
181 	if (icmpv6_mask_allow(net, type))
182 		return true;
183 
184 	if (icmp_global_allow())
185 		return true;
186 
187 	return false;
188 }
189 
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194 			       struct flowi6 *fl6)
195 {
196 	struct net *net = sock_net(sk);
197 	struct dst_entry *dst;
198 	bool res = false;
199 
200 	if (icmpv6_mask_allow(net, type))
201 		return true;
202 
203 	/*
204 	 * Look up the output route.
205 	 * XXX: perhaps the expire for routing entries cloned by
206 	 * this lookup should be more aggressive (not longer than timeout).
207 	 */
208 	dst = ip6_route_output(net, sk, fl6);
209 	if (dst->error) {
210 		IP6_INC_STATS(net, ip6_dst_idev(dst),
211 			      IPSTATS_MIB_OUTNOROUTES);
212 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213 		res = true;
214 	} else {
215 		struct rt6_info *rt = (struct rt6_info *)dst;
216 		int tmo = net->ipv6.sysctl.icmpv6_time;
217 		struct inet_peer *peer;
218 
219 		/* Give more bandwidth to wider prefixes. */
220 		if (rt->rt6i_dst.plen < 128)
221 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222 
223 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224 		res = inet_peer_xrlim_allow(peer, tmo);
225 		if (peer)
226 			inet_putpeer(peer);
227 	}
228 	dst_release(dst);
229 	return res;
230 }
231 
232 /*
233  *	an inline helper for the "simple" if statement below
234  *	checks if parameter problem report is caused by an
235  *	unrecognized IPv6 option that has the Option Type
236  *	highest-order two bits set to 10
237  */
238 
239 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
240 {
241 	u8 _optval, *op;
242 
243 	offset += skb_network_offset(skb);
244 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
245 	if (!op)
246 		return true;
247 	return (*op & 0xC0) == 0x80;
248 }
249 
250 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
251 				struct icmp6hdr *thdr, int len)
252 {
253 	struct sk_buff *skb;
254 	struct icmp6hdr *icmp6h;
255 
256 	skb = skb_peek(&sk->sk_write_queue);
257 	if (!skb)
258 		return;
259 
260 	icmp6h = icmp6_hdr(skb);
261 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
262 	icmp6h->icmp6_cksum = 0;
263 
264 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
265 		skb->csum = csum_partial(icmp6h,
266 					sizeof(struct icmp6hdr), skb->csum);
267 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
268 						      &fl6->daddr,
269 						      len, fl6->flowi6_proto,
270 						      skb->csum);
271 	} else {
272 		__wsum tmp_csum = 0;
273 
274 		skb_queue_walk(&sk->sk_write_queue, skb) {
275 			tmp_csum = csum_add(tmp_csum, skb->csum);
276 		}
277 
278 		tmp_csum = csum_partial(icmp6h,
279 					sizeof(struct icmp6hdr), tmp_csum);
280 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
281 						      &fl6->daddr,
282 						      len, fl6->flowi6_proto,
283 						      tmp_csum);
284 	}
285 	ip6_push_pending_frames(sk);
286 }
287 
288 struct icmpv6_msg {
289 	struct sk_buff	*skb;
290 	int		offset;
291 	uint8_t		type;
292 };
293 
294 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
295 {
296 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
297 	struct sk_buff *org_skb = msg->skb;
298 	__wsum csum = 0;
299 
300 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
301 				      to, len, csum);
302 	skb->csum = csum_block_add(skb->csum, csum, odd);
303 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
304 		nf_ct_attach(skb, org_skb);
305 	return 0;
306 }
307 
308 #if IS_ENABLED(CONFIG_IPV6_MIP6)
309 static void mip6_addr_swap(struct sk_buff *skb)
310 {
311 	struct ipv6hdr *iph = ipv6_hdr(skb);
312 	struct inet6_skb_parm *opt = IP6CB(skb);
313 	struct ipv6_destopt_hao *hao;
314 	struct in6_addr tmp;
315 	int off;
316 
317 	if (opt->dsthao) {
318 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
319 		if (likely(off >= 0)) {
320 			hao = (struct ipv6_destopt_hao *)
321 					(skb_network_header(skb) + off);
322 			tmp = iph->saddr;
323 			iph->saddr = hao->addr;
324 			hao->addr = tmp;
325 		}
326 	}
327 }
328 #else
329 static inline void mip6_addr_swap(struct sk_buff *skb) {}
330 #endif
331 
332 static struct dst_entry *icmpv6_route_lookup(struct net *net,
333 					     struct sk_buff *skb,
334 					     struct sock *sk,
335 					     struct flowi6 *fl6)
336 {
337 	struct dst_entry *dst, *dst2;
338 	struct flowi6 fl2;
339 	int err;
340 
341 	err = ip6_dst_lookup(net, sk, &dst, fl6);
342 	if (err)
343 		return ERR_PTR(err);
344 
345 	/*
346 	 * We won't send icmp if the destination is known
347 	 * anycast.
348 	 */
349 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
350 		net_dbg_ratelimited("icmp6_send: acast source\n");
351 		dst_release(dst);
352 		return ERR_PTR(-EINVAL);
353 	}
354 
355 	/* No need to clone since we're just using its address. */
356 	dst2 = dst;
357 
358 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
359 	if (!IS_ERR(dst)) {
360 		if (dst != dst2)
361 			return dst;
362 	} else {
363 		if (PTR_ERR(dst) == -EPERM)
364 			dst = NULL;
365 		else
366 			return dst;
367 	}
368 
369 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
370 	if (err)
371 		goto relookup_failed;
372 
373 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
374 	if (err)
375 		goto relookup_failed;
376 
377 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
378 	if (!IS_ERR(dst2)) {
379 		dst_release(dst);
380 		dst = dst2;
381 	} else {
382 		err = PTR_ERR(dst2);
383 		if (err == -EPERM) {
384 			dst_release(dst);
385 			return dst2;
386 		} else
387 			goto relookup_failed;
388 	}
389 
390 relookup_failed:
391 	if (dst)
392 		return dst;
393 	return ERR_PTR(err);
394 }
395 
396 static struct net_device *icmp6_dev(const struct sk_buff *skb)
397 {
398 	struct net_device *dev = skb->dev;
399 
400 	/* for local traffic to local address, skb dev is the loopback
401 	 * device. Check if there is a dst attached to the skb and if so
402 	 * get the real device index. Same is needed for replies to a link
403 	 * local address on a device enslaved to an L3 master device
404 	 */
405 	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
406 		const struct rt6_info *rt6 = skb_rt6_info(skb);
407 
408 		if (rt6)
409 			dev = rt6->rt6i_idev->dev;
410 	}
411 
412 	return dev;
413 }
414 
415 static int icmp6_iif(const struct sk_buff *skb)
416 {
417 	return icmp6_dev(skb)->ifindex;
418 }
419 
420 /*
421  *	Send an ICMP message in response to a packet in error
422  */
423 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
424 		       const struct in6_addr *force_saddr)
425 {
426 	struct inet6_dev *idev = NULL;
427 	struct ipv6hdr *hdr = ipv6_hdr(skb);
428 	struct sock *sk;
429 	struct net *net;
430 	struct ipv6_pinfo *np;
431 	const struct in6_addr *saddr = NULL;
432 	struct dst_entry *dst;
433 	struct icmp6hdr tmp_hdr;
434 	struct flowi6 fl6;
435 	struct icmpv6_msg msg;
436 	struct ipcm6_cookie ipc6;
437 	int iif = 0;
438 	int addr_type = 0;
439 	int len;
440 	u32 mark;
441 
442 	if ((u8 *)hdr < skb->head ||
443 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
444 		return;
445 
446 	if (!skb->dev)
447 		return;
448 	net = dev_net(skb->dev);
449 	mark = IP6_REPLY_MARK(net, skb->mark);
450 	/*
451 	 *	Make sure we respect the rules
452 	 *	i.e. RFC 1885 2.4(e)
453 	 *	Rule (e.1) is enforced by not using icmp6_send
454 	 *	in any code that processes icmp errors.
455 	 */
456 	addr_type = ipv6_addr_type(&hdr->daddr);
457 
458 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
459 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
460 		saddr = &hdr->daddr;
461 
462 	/*
463 	 *	Dest addr check
464 	 */
465 
466 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
467 		if (type != ICMPV6_PKT_TOOBIG &&
468 		    !(type == ICMPV6_PARAMPROB &&
469 		      code == ICMPV6_UNK_OPTION &&
470 		      (opt_unrec(skb, info))))
471 			return;
472 
473 		saddr = NULL;
474 	}
475 
476 	addr_type = ipv6_addr_type(&hdr->saddr);
477 
478 	/*
479 	 *	Source addr check
480 	 */
481 
482 	if (__ipv6_addr_needs_scope_id(addr_type)) {
483 		iif = icmp6_iif(skb);
484 	} else {
485 		dst = skb_dst(skb);
486 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
487 	}
488 
489 	/*
490 	 *	Must not send error if the source does not uniquely
491 	 *	identify a single node (RFC2463 Section 2.4).
492 	 *	We check unspecified / multicast addresses here,
493 	 *	and anycast addresses will be checked later.
494 	 */
495 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
496 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
497 				    &hdr->saddr, &hdr->daddr);
498 		return;
499 	}
500 
501 	/*
502 	 *	Never answer to a ICMP packet.
503 	 */
504 	if (is_ineligible(skb)) {
505 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
506 				    &hdr->saddr, &hdr->daddr);
507 		return;
508 	}
509 
510 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
511 	local_bh_disable();
512 
513 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
514 	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
515 		goto out_bh_enable;
516 
517 	mip6_addr_swap(skb);
518 
519 	sk = icmpv6_xmit_lock(net);
520 	if (!sk)
521 		goto out_bh_enable;
522 
523 	memset(&fl6, 0, sizeof(fl6));
524 	fl6.flowi6_proto = IPPROTO_ICMPV6;
525 	fl6.daddr = hdr->saddr;
526 	if (force_saddr)
527 		saddr = force_saddr;
528 	if (saddr) {
529 		fl6.saddr = *saddr;
530 	} else {
531 		/* select a more meaningful saddr from input if */
532 		struct net_device *in_netdev;
533 
534 		in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
535 		if (in_netdev) {
536 			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
537 					   inet6_sk(sk)->srcprefs,
538 					   &fl6.saddr);
539 			dev_put(in_netdev);
540 		}
541 	}
542 	fl6.flowi6_mark = mark;
543 	fl6.flowi6_oif = iif;
544 	fl6.fl6_icmp_type = type;
545 	fl6.fl6_icmp_code = code;
546 	fl6.flowi6_uid = sock_net_uid(net, NULL);
547 	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
548 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
549 
550 	sk->sk_mark = mark;
551 	np = inet6_sk(sk);
552 
553 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
554 		goto out;
555 
556 	tmp_hdr.icmp6_type = type;
557 	tmp_hdr.icmp6_code = code;
558 	tmp_hdr.icmp6_cksum = 0;
559 	tmp_hdr.icmp6_pointer = htonl(info);
560 
561 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
562 		fl6.flowi6_oif = np->mcast_oif;
563 	else if (!fl6.flowi6_oif)
564 		fl6.flowi6_oif = np->ucast_oif;
565 
566 	ipcm6_init_sk(&ipc6, np);
567 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
568 
569 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
570 	if (IS_ERR(dst))
571 		goto out;
572 
573 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
574 
575 	msg.skb = skb;
576 	msg.offset = skb_network_offset(skb);
577 	msg.type = type;
578 
579 	len = skb->len - msg.offset;
580 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
581 	if (len < 0) {
582 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
583 				    &hdr->saddr, &hdr->daddr);
584 		goto out_dst_release;
585 	}
586 
587 	rcu_read_lock();
588 	idev = __in6_dev_get(skb->dev);
589 
590 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
591 			    len + sizeof(struct icmp6hdr),
592 			    sizeof(struct icmp6hdr),
593 			    &ipc6, &fl6, (struct rt6_info *)dst,
594 			    MSG_DONTWAIT)) {
595 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
596 		ip6_flush_pending_frames(sk);
597 	} else {
598 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
599 					   len + sizeof(struct icmp6hdr));
600 	}
601 	rcu_read_unlock();
602 out_dst_release:
603 	dst_release(dst);
604 out:
605 	icmpv6_xmit_unlock(sk);
606 out_bh_enable:
607 	local_bh_enable();
608 }
609 
610 /* Slightly more convenient version of icmp6_send.
611  */
612 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
613 {
614 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
615 	kfree_skb(skb);
616 }
617 
618 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
619  * if sufficient data bytes are available
620  * @nhs is the size of the tunnel header(s) :
621  *  Either an IPv4 header for SIT encap
622  *         an IPv4 header + GRE header for GRE encap
623  */
624 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
625 			       unsigned int data_len)
626 {
627 	struct in6_addr temp_saddr;
628 	struct rt6_info *rt;
629 	struct sk_buff *skb2;
630 	u32 info = 0;
631 
632 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
633 		return 1;
634 
635 	/* RFC 4884 (partial) support for ICMP extensions */
636 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
637 		data_len = 0;
638 
639 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
640 
641 	if (!skb2)
642 		return 1;
643 
644 	skb_dst_drop(skb2);
645 	skb_pull(skb2, nhs);
646 	skb_reset_network_header(skb2);
647 
648 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
649 			skb, 0);
650 
651 	if (rt && rt->dst.dev)
652 		skb2->dev = rt->dst.dev;
653 
654 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
655 
656 	if (data_len) {
657 		/* RFC 4884 (partial) support :
658 		 * insert 0 padding at the end, before the extensions
659 		 */
660 		__skb_push(skb2, nhs);
661 		skb_reset_network_header(skb2);
662 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
663 		memset(skb2->data + data_len - nhs, 0, nhs);
664 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
665 		 * and stored in reserved[0]
666 		 */
667 		info = (data_len/8) << 24;
668 	}
669 	if (type == ICMP_TIME_EXCEEDED)
670 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
671 			   info, &temp_saddr);
672 	else
673 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
674 			   info, &temp_saddr);
675 	if (rt)
676 		ip6_rt_put(rt);
677 
678 	kfree_skb(skb2);
679 
680 	return 0;
681 }
682 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
683 
684 static void icmpv6_echo_reply(struct sk_buff *skb)
685 {
686 	struct net *net = dev_net(skb->dev);
687 	struct sock *sk;
688 	struct inet6_dev *idev;
689 	struct ipv6_pinfo *np;
690 	const struct in6_addr *saddr = NULL;
691 	struct icmp6hdr *icmph = icmp6_hdr(skb);
692 	struct icmp6hdr tmp_hdr;
693 	struct flowi6 fl6;
694 	struct icmpv6_msg msg;
695 	struct dst_entry *dst;
696 	struct ipcm6_cookie ipc6;
697 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
698 	bool acast;
699 
700 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
701 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
702 		return;
703 
704 	saddr = &ipv6_hdr(skb)->daddr;
705 
706 	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
707 	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
708 		return;
709 
710 	if (!ipv6_unicast_destination(skb) &&
711 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
712 		saddr = NULL;
713 
714 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
715 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
716 
717 	memset(&fl6, 0, sizeof(fl6));
718 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
719 		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
720 
721 	fl6.flowi6_proto = IPPROTO_ICMPV6;
722 	fl6.daddr = ipv6_hdr(skb)->saddr;
723 	if (saddr)
724 		fl6.saddr = *saddr;
725 	fl6.flowi6_oif = icmp6_iif(skb);
726 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
727 	fl6.flowi6_mark = mark;
728 	fl6.flowi6_uid = sock_net_uid(net, NULL);
729 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
730 
731 	local_bh_disable();
732 	sk = icmpv6_xmit_lock(net);
733 	if (!sk)
734 		goto out_bh_enable;
735 	sk->sk_mark = mark;
736 	np = inet6_sk(sk);
737 
738 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
739 		fl6.flowi6_oif = np->mcast_oif;
740 	else if (!fl6.flowi6_oif)
741 		fl6.flowi6_oif = np->ucast_oif;
742 
743 	if (ip6_dst_lookup(net, sk, &dst, &fl6))
744 		goto out;
745 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
746 	if (IS_ERR(dst))
747 		goto out;
748 
749 	/* Check the ratelimit */
750 	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
751 	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
752 		goto out_dst_release;
753 
754 	idev = __in6_dev_get(skb->dev);
755 
756 	msg.skb = skb;
757 	msg.offset = 0;
758 	msg.type = ICMPV6_ECHO_REPLY;
759 
760 	ipcm6_init_sk(&ipc6, np);
761 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
762 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
763 
764 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
765 			    skb->len + sizeof(struct icmp6hdr),
766 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
767 			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
768 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
769 		ip6_flush_pending_frames(sk);
770 	} else {
771 		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
772 					   skb->len + sizeof(struct icmp6hdr));
773 	}
774 out_dst_release:
775 	dst_release(dst);
776 out:
777 	icmpv6_xmit_unlock(sk);
778 out_bh_enable:
779 	local_bh_enable();
780 }
781 
782 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
783 {
784 	const struct inet6_protocol *ipprot;
785 	int inner_offset;
786 	__be16 frag_off;
787 	u8 nexthdr;
788 	struct net *net = dev_net(skb->dev);
789 
790 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
791 		goto out;
792 
793 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
794 	if (ipv6_ext_hdr(nexthdr)) {
795 		/* now skip over extension headers */
796 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
797 						&nexthdr, &frag_off);
798 		if (inner_offset < 0)
799 			goto out;
800 	} else {
801 		inner_offset = sizeof(struct ipv6hdr);
802 	}
803 
804 	/* Checkin header including 8 bytes of inner protocol header. */
805 	if (!pskb_may_pull(skb, inner_offset+8))
806 		goto out;
807 
808 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
809 	   Without this we will not able f.e. to make source routed
810 	   pmtu discovery.
811 	   Corresponding argument (opt) to notifiers is already added.
812 	   --ANK (980726)
813 	 */
814 
815 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
816 	if (ipprot && ipprot->err_handler)
817 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
818 
819 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
820 	return;
821 
822 out:
823 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
824 }
825 
826 /*
827  *	Handle icmp messages
828  */
829 
830 static int icmpv6_rcv(struct sk_buff *skb)
831 {
832 	struct net *net = dev_net(skb->dev);
833 	struct net_device *dev = icmp6_dev(skb);
834 	struct inet6_dev *idev = __in6_dev_get(dev);
835 	const struct in6_addr *saddr, *daddr;
836 	struct icmp6hdr *hdr;
837 	u8 type;
838 	bool success = false;
839 
840 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
841 		struct sec_path *sp = skb_sec_path(skb);
842 		int nh;
843 
844 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
845 				 XFRM_STATE_ICMP))
846 			goto drop_no_count;
847 
848 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
849 			goto drop_no_count;
850 
851 		nh = skb_network_offset(skb);
852 		skb_set_network_header(skb, sizeof(*hdr));
853 
854 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
855 			goto drop_no_count;
856 
857 		skb_set_network_header(skb, nh);
858 	}
859 
860 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
861 
862 	saddr = &ipv6_hdr(skb)->saddr;
863 	daddr = &ipv6_hdr(skb)->daddr;
864 
865 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
866 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
867 				    saddr, daddr);
868 		goto csum_error;
869 	}
870 
871 	if (!pskb_pull(skb, sizeof(*hdr)))
872 		goto discard_it;
873 
874 	hdr = icmp6_hdr(skb);
875 
876 	type = hdr->icmp6_type;
877 
878 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
879 
880 	switch (type) {
881 	case ICMPV6_ECHO_REQUEST:
882 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
883 			icmpv6_echo_reply(skb);
884 		break;
885 
886 	case ICMPV6_ECHO_REPLY:
887 		success = ping_rcv(skb);
888 		break;
889 
890 	case ICMPV6_PKT_TOOBIG:
891 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
892 		   standard destination cache. Seems, only "advanced"
893 		   destination cache will allow to solve this problem
894 		   --ANK (980726)
895 		 */
896 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
897 			goto discard_it;
898 		hdr = icmp6_hdr(skb);
899 
900 		/* to notify */
901 		/* fall through */
902 	case ICMPV6_DEST_UNREACH:
903 	case ICMPV6_TIME_EXCEED:
904 	case ICMPV6_PARAMPROB:
905 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
906 		break;
907 
908 	case NDISC_ROUTER_SOLICITATION:
909 	case NDISC_ROUTER_ADVERTISEMENT:
910 	case NDISC_NEIGHBOUR_SOLICITATION:
911 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
912 	case NDISC_REDIRECT:
913 		ndisc_rcv(skb);
914 		break;
915 
916 	case ICMPV6_MGM_QUERY:
917 		igmp6_event_query(skb);
918 		break;
919 
920 	case ICMPV6_MGM_REPORT:
921 		igmp6_event_report(skb);
922 		break;
923 
924 	case ICMPV6_MGM_REDUCTION:
925 	case ICMPV6_NI_QUERY:
926 	case ICMPV6_NI_REPLY:
927 	case ICMPV6_MLD2_REPORT:
928 	case ICMPV6_DHAAD_REQUEST:
929 	case ICMPV6_DHAAD_REPLY:
930 	case ICMPV6_MOBILE_PREFIX_SOL:
931 	case ICMPV6_MOBILE_PREFIX_ADV:
932 		break;
933 
934 	default:
935 		/* informational */
936 		if (type & ICMPV6_INFOMSG_MASK)
937 			break;
938 
939 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
940 				    saddr, daddr);
941 
942 		/*
943 		 * error of unknown type.
944 		 * must pass to upper level
945 		 */
946 
947 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
948 	}
949 
950 	/* until the v6 path can be better sorted assume failure and
951 	 * preserve the status quo behaviour for the rest of the paths to here
952 	 */
953 	if (success)
954 		consume_skb(skb);
955 	else
956 		kfree_skb(skb);
957 
958 	return 0;
959 
960 csum_error:
961 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
962 discard_it:
963 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
964 drop_no_count:
965 	kfree_skb(skb);
966 	return 0;
967 }
968 
969 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
970 		      u8 type,
971 		      const struct in6_addr *saddr,
972 		      const struct in6_addr *daddr,
973 		      int oif)
974 {
975 	memset(fl6, 0, sizeof(*fl6));
976 	fl6->saddr = *saddr;
977 	fl6->daddr = *daddr;
978 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
979 	fl6->fl6_icmp_type	= type;
980 	fl6->fl6_icmp_code	= 0;
981 	fl6->flowi6_oif		= oif;
982 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
983 }
984 
985 static void __net_exit icmpv6_sk_exit(struct net *net)
986 {
987 	int i;
988 
989 	for_each_possible_cpu(i)
990 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
991 	free_percpu(net->ipv6.icmp_sk);
992 }
993 
994 static int __net_init icmpv6_sk_init(struct net *net)
995 {
996 	struct sock *sk;
997 	int err, i;
998 
999 	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1000 	if (!net->ipv6.icmp_sk)
1001 		return -ENOMEM;
1002 
1003 	for_each_possible_cpu(i) {
1004 		err = inet_ctl_sock_create(&sk, PF_INET6,
1005 					   SOCK_RAW, IPPROTO_ICMPV6, net);
1006 		if (err < 0) {
1007 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1008 			       err);
1009 			goto fail;
1010 		}
1011 
1012 		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1013 
1014 		/* Enough space for 2 64K ICMP packets, including
1015 		 * sk_buff struct overhead.
1016 		 */
1017 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1018 	}
1019 	return 0;
1020 
1021  fail:
1022 	icmpv6_sk_exit(net);
1023 	return err;
1024 }
1025 
1026 static struct pernet_operations icmpv6_sk_ops = {
1027 	.init = icmpv6_sk_init,
1028 	.exit = icmpv6_sk_exit,
1029 };
1030 
1031 int __init icmpv6_init(void)
1032 {
1033 	int err;
1034 
1035 	err = register_pernet_subsys(&icmpv6_sk_ops);
1036 	if (err < 0)
1037 		return err;
1038 
1039 	err = -EAGAIN;
1040 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1041 		goto fail;
1042 
1043 	err = inet6_register_icmp_sender(icmp6_send);
1044 	if (err)
1045 		goto sender_reg_err;
1046 	return 0;
1047 
1048 sender_reg_err:
1049 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1050 fail:
1051 	pr_err("Failed to register ICMP6 protocol\n");
1052 	unregister_pernet_subsys(&icmpv6_sk_ops);
1053 	return err;
1054 }
1055 
1056 void icmpv6_cleanup(void)
1057 {
1058 	inet6_unregister_icmp_sender(icmp6_send);
1059 	unregister_pernet_subsys(&icmpv6_sk_ops);
1060 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1061 }
1062 
1063 
1064 static const struct icmp6_err {
1065 	int err;
1066 	int fatal;
1067 } tab_unreach[] = {
1068 	{	/* NOROUTE */
1069 		.err	= ENETUNREACH,
1070 		.fatal	= 0,
1071 	},
1072 	{	/* ADM_PROHIBITED */
1073 		.err	= EACCES,
1074 		.fatal	= 1,
1075 	},
1076 	{	/* Was NOT_NEIGHBOUR, now reserved */
1077 		.err	= EHOSTUNREACH,
1078 		.fatal	= 0,
1079 	},
1080 	{	/* ADDR_UNREACH	*/
1081 		.err	= EHOSTUNREACH,
1082 		.fatal	= 0,
1083 	},
1084 	{	/* PORT_UNREACH	*/
1085 		.err	= ECONNREFUSED,
1086 		.fatal	= 1,
1087 	},
1088 	{	/* POLICY_FAIL */
1089 		.err	= EACCES,
1090 		.fatal	= 1,
1091 	},
1092 	{	/* REJECT_ROUTE	*/
1093 		.err	= EACCES,
1094 		.fatal	= 1,
1095 	},
1096 };
1097 
1098 int icmpv6_err_convert(u8 type, u8 code, int *err)
1099 {
1100 	int fatal = 0;
1101 
1102 	*err = EPROTO;
1103 
1104 	switch (type) {
1105 	case ICMPV6_DEST_UNREACH:
1106 		fatal = 1;
1107 		if (code < ARRAY_SIZE(tab_unreach)) {
1108 			*err  = tab_unreach[code].err;
1109 			fatal = tab_unreach[code].fatal;
1110 		}
1111 		break;
1112 
1113 	case ICMPV6_PKT_TOOBIG:
1114 		*err = EMSGSIZE;
1115 		break;
1116 
1117 	case ICMPV6_PARAMPROB:
1118 		*err = EPROTO;
1119 		fatal = 1;
1120 		break;
1121 
1122 	case ICMPV6_TIME_EXCEED:
1123 		*err = EHOSTUNREACH;
1124 		break;
1125 	}
1126 
1127 	return fatal;
1128 }
1129 EXPORT_SYMBOL(icmpv6_err_convert);
1130 
1131 #ifdef CONFIG_SYSCTL
1132 static struct ctl_table ipv6_icmp_table_template[] = {
1133 	{
1134 		.procname	= "ratelimit",
1135 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1136 		.maxlen		= sizeof(int),
1137 		.mode		= 0644,
1138 		.proc_handler	= proc_dointvec_ms_jiffies,
1139 	},
1140 	{
1141 		.procname	= "echo_ignore_all",
1142 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1143 		.maxlen		= sizeof(int),
1144 		.mode		= 0644,
1145 		.proc_handler = proc_dointvec,
1146 	},
1147 	{
1148 		.procname	= "echo_ignore_multicast",
1149 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1150 		.maxlen		= sizeof(int),
1151 		.mode		= 0644,
1152 		.proc_handler = proc_dointvec,
1153 	},
1154 	{
1155 		.procname	= "echo_ignore_anycast",
1156 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1157 		.maxlen		= sizeof(int),
1158 		.mode		= 0644,
1159 		.proc_handler = proc_dointvec,
1160 	},
1161 	{
1162 		.procname	= "ratemask",
1163 		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1164 		.maxlen		= ICMPV6_MSG_MAX + 1,
1165 		.mode		= 0644,
1166 		.proc_handler = proc_do_large_bitmap,
1167 	},
1168 	{ },
1169 };
1170 
1171 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1172 {
1173 	struct ctl_table *table;
1174 
1175 	table = kmemdup(ipv6_icmp_table_template,
1176 			sizeof(ipv6_icmp_table_template),
1177 			GFP_KERNEL);
1178 
1179 	if (table) {
1180 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1181 		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1182 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1183 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1184 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1185 	}
1186 	return table;
1187 }
1188 #endif
1189