xref: /openbmc/linux/net/ipv6/icmp.c (revision f7d84fa7)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 /* Called with BH disabled */
114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
115 {
116 	struct sock *sk;
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		return NULL;
125 	}
126 	return sk;
127 }
128 
129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
130 {
131 	spin_unlock(&sk->sk_lock.slock);
132 }
133 
134 /*
135  * Figure out, may we reply to this packet with icmp error.
136  *
137  * We do not reply, if:
138  *	- it was icmp error message.
139  *	- it is truncated, so that it is known, that protocol is ICMPV6
140  *	  (i.e. in the middle of some exthdr)
141  *
142  *	--ANK (980726)
143  */
144 
145 static bool is_ineligible(const struct sk_buff *skb)
146 {
147 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
148 	int len = skb->len - ptr;
149 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
150 	__be16 frag_off;
151 
152 	if (len < 0)
153 		return true;
154 
155 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
156 	if (ptr < 0)
157 		return false;
158 	if (nexthdr == IPPROTO_ICMPV6) {
159 		u8 _type, *tp;
160 		tp = skb_header_pointer(skb,
161 			ptr+offsetof(struct icmp6hdr, icmp6_type),
162 			sizeof(_type), &_type);
163 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
164 			return true;
165 	}
166 	return false;
167 }
168 
169 static bool icmpv6_mask_allow(int type)
170 {
171 	/* Informational messages are not limited. */
172 	if (type & ICMPV6_INFOMSG_MASK)
173 		return true;
174 
175 	/* Do not limit pmtu discovery, it would break it. */
176 	if (type == ICMPV6_PKT_TOOBIG)
177 		return true;
178 
179 	return false;
180 }
181 
182 static bool icmpv6_global_allow(int type)
183 {
184 	if (icmpv6_mask_allow(type))
185 		return true;
186 
187 	if (icmp_global_allow())
188 		return true;
189 
190 	return false;
191 }
192 
193 /*
194  * Check the ICMP output rate limit
195  */
196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
197 			       struct flowi6 *fl6)
198 {
199 	struct net *net = sock_net(sk);
200 	struct dst_entry *dst;
201 	bool res = false;
202 
203 	if (icmpv6_mask_allow(type))
204 		return true;
205 
206 	/*
207 	 * Look up the output route.
208 	 * XXX: perhaps the expire for routing entries cloned by
209 	 * this lookup should be more aggressive (not longer than timeout).
210 	 */
211 	dst = ip6_route_output(net, sk, fl6);
212 	if (dst->error) {
213 		IP6_INC_STATS(net, ip6_dst_idev(dst),
214 			      IPSTATS_MIB_OUTNOROUTES);
215 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
216 		res = true;
217 	} else {
218 		struct rt6_info *rt = (struct rt6_info *)dst;
219 		int tmo = net->ipv6.sysctl.icmpv6_time;
220 		struct inet_peer *peer;
221 
222 		/* Give more bandwidth to wider prefixes. */
223 		if (rt->rt6i_dst.plen < 128)
224 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225 
226 		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
227 		res = inet_peer_xrlim_allow(peer, tmo);
228 		if (peer)
229 			inet_putpeer(peer);
230 	}
231 	dst_release(dst);
232 	return res;
233 }
234 
235 /*
236  *	an inline helper for the "simple" if statement below
237  *	checks if parameter problem report is caused by an
238  *	unrecognized IPv6 option that has the Option Type
239  *	highest-order two bits set to 10
240  */
241 
242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
243 {
244 	u8 _optval, *op;
245 
246 	offset += skb_network_offset(skb);
247 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
248 	if (!op)
249 		return true;
250 	return (*op & 0xC0) == 0x80;
251 }
252 
253 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
254 			       struct icmp6hdr *thdr, int len)
255 {
256 	struct sk_buff *skb;
257 	struct icmp6hdr *icmp6h;
258 	int err = 0;
259 
260 	skb = skb_peek(&sk->sk_write_queue);
261 	if (!skb)
262 		goto out;
263 
264 	icmp6h = icmp6_hdr(skb);
265 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
266 	icmp6h->icmp6_cksum = 0;
267 
268 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
269 		skb->csum = csum_partial(icmp6h,
270 					sizeof(struct icmp6hdr), skb->csum);
271 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
272 						      &fl6->daddr,
273 						      len, fl6->flowi6_proto,
274 						      skb->csum);
275 	} else {
276 		__wsum tmp_csum = 0;
277 
278 		skb_queue_walk(&sk->sk_write_queue, skb) {
279 			tmp_csum = csum_add(tmp_csum, skb->csum);
280 		}
281 
282 		tmp_csum = csum_partial(icmp6h,
283 					sizeof(struct icmp6hdr), tmp_csum);
284 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
285 						      &fl6->daddr,
286 						      len, fl6->flowi6_proto,
287 						      tmp_csum);
288 	}
289 	ip6_push_pending_frames(sk);
290 out:
291 	return err;
292 }
293 
294 struct icmpv6_msg {
295 	struct sk_buff	*skb;
296 	int		offset;
297 	uint8_t		type;
298 };
299 
300 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
301 {
302 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
303 	struct sk_buff *org_skb = msg->skb;
304 	__wsum csum = 0;
305 
306 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
307 				      to, len, csum);
308 	skb->csum = csum_block_add(skb->csum, csum, odd);
309 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
310 		nf_ct_attach(skb, org_skb);
311 	return 0;
312 }
313 
314 #if IS_ENABLED(CONFIG_IPV6_MIP6)
315 static void mip6_addr_swap(struct sk_buff *skb)
316 {
317 	struct ipv6hdr *iph = ipv6_hdr(skb);
318 	struct inet6_skb_parm *opt = IP6CB(skb);
319 	struct ipv6_destopt_hao *hao;
320 	struct in6_addr tmp;
321 	int off;
322 
323 	if (opt->dsthao) {
324 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
325 		if (likely(off >= 0)) {
326 			hao = (struct ipv6_destopt_hao *)
327 					(skb_network_header(skb) + off);
328 			tmp = iph->saddr;
329 			iph->saddr = hao->addr;
330 			hao->addr = tmp;
331 		}
332 	}
333 }
334 #else
335 static inline void mip6_addr_swap(struct sk_buff *skb) {}
336 #endif
337 
338 static struct dst_entry *icmpv6_route_lookup(struct net *net,
339 					     struct sk_buff *skb,
340 					     struct sock *sk,
341 					     struct flowi6 *fl6)
342 {
343 	struct dst_entry *dst, *dst2;
344 	struct flowi6 fl2;
345 	int err;
346 
347 	err = ip6_dst_lookup(net, sk, &dst, fl6);
348 	if (err)
349 		return ERR_PTR(err);
350 
351 	/*
352 	 * We won't send icmp if the destination is known
353 	 * anycast.
354 	 */
355 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
356 		net_dbg_ratelimited("icmp6_send: acast source\n");
357 		dst_release(dst);
358 		return ERR_PTR(-EINVAL);
359 	}
360 
361 	/* No need to clone since we're just using its address. */
362 	dst2 = dst;
363 
364 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
365 	if (!IS_ERR(dst)) {
366 		if (dst != dst2)
367 			return dst;
368 	} else {
369 		if (PTR_ERR(dst) == -EPERM)
370 			dst = NULL;
371 		else
372 			return dst;
373 	}
374 
375 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
376 	if (err)
377 		goto relookup_failed;
378 
379 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
380 	if (err)
381 		goto relookup_failed;
382 
383 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
384 	if (!IS_ERR(dst2)) {
385 		dst_release(dst);
386 		dst = dst2;
387 	} else {
388 		err = PTR_ERR(dst2);
389 		if (err == -EPERM) {
390 			dst_release(dst);
391 			return dst2;
392 		} else
393 			goto relookup_failed;
394 	}
395 
396 relookup_failed:
397 	if (dst)
398 		return dst;
399 	return ERR_PTR(err);
400 }
401 
402 /*
403  *	Send an ICMP message in response to a packet in error
404  */
405 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
406 		       const struct in6_addr *force_saddr)
407 {
408 	struct net *net = dev_net(skb->dev);
409 	struct inet6_dev *idev = NULL;
410 	struct ipv6hdr *hdr = ipv6_hdr(skb);
411 	struct sock *sk;
412 	struct ipv6_pinfo *np;
413 	const struct in6_addr *saddr = NULL;
414 	struct dst_entry *dst;
415 	struct icmp6hdr tmp_hdr;
416 	struct flowi6 fl6;
417 	struct icmpv6_msg msg;
418 	struct sockcm_cookie sockc_unused = {0};
419 	struct ipcm6_cookie ipc6;
420 	int iif = 0;
421 	int addr_type = 0;
422 	int len;
423 	int err = 0;
424 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
425 
426 	if ((u8 *)hdr < skb->head ||
427 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
428 		return;
429 
430 	/*
431 	 *	Make sure we respect the rules
432 	 *	i.e. RFC 1885 2.4(e)
433 	 *	Rule (e.1) is enforced by not using icmp6_send
434 	 *	in any code that processes icmp errors.
435 	 */
436 	addr_type = ipv6_addr_type(&hdr->daddr);
437 
438 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
439 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
440 		saddr = &hdr->daddr;
441 
442 	/*
443 	 *	Dest addr check
444 	 */
445 
446 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
447 		if (type != ICMPV6_PKT_TOOBIG &&
448 		    !(type == ICMPV6_PARAMPROB &&
449 		      code == ICMPV6_UNK_OPTION &&
450 		      (opt_unrec(skb, info))))
451 			return;
452 
453 		saddr = NULL;
454 	}
455 
456 	addr_type = ipv6_addr_type(&hdr->saddr);
457 
458 	/*
459 	 *	Source addr check
460 	 */
461 
462 	if (__ipv6_addr_needs_scope_id(addr_type))
463 		iif = skb->dev->ifindex;
464 	else {
465 		dst = skb_dst(skb);
466 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
467 	}
468 
469 	/*
470 	 *	Must not send error if the source does not uniquely
471 	 *	identify a single node (RFC2463 Section 2.4).
472 	 *	We check unspecified / multicast addresses here,
473 	 *	and anycast addresses will be checked later.
474 	 */
475 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
476 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
477 				    &hdr->saddr, &hdr->daddr);
478 		return;
479 	}
480 
481 	/*
482 	 *	Never answer to a ICMP packet.
483 	 */
484 	if (is_ineligible(skb)) {
485 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
486 				    &hdr->saddr, &hdr->daddr);
487 		return;
488 	}
489 
490 	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
491 	local_bh_disable();
492 
493 	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
494 	if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
495 		goto out_bh_enable;
496 
497 	mip6_addr_swap(skb);
498 
499 	memset(&fl6, 0, sizeof(fl6));
500 	fl6.flowi6_proto = IPPROTO_ICMPV6;
501 	fl6.daddr = hdr->saddr;
502 	if (force_saddr)
503 		saddr = force_saddr;
504 	if (saddr)
505 		fl6.saddr = *saddr;
506 	fl6.flowi6_mark = mark;
507 	fl6.flowi6_oif = iif;
508 	fl6.fl6_icmp_type = type;
509 	fl6.fl6_icmp_code = code;
510 	fl6.flowi6_uid = sock_net_uid(net, NULL);
511 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
512 
513 	sk = icmpv6_xmit_lock(net);
514 	if (!sk)
515 		goto out_bh_enable;
516 
517 	sk->sk_mark = mark;
518 	np = inet6_sk(sk);
519 
520 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
521 		goto out;
522 
523 	tmp_hdr.icmp6_type = type;
524 	tmp_hdr.icmp6_code = code;
525 	tmp_hdr.icmp6_cksum = 0;
526 	tmp_hdr.icmp6_pointer = htonl(info);
527 
528 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
529 		fl6.flowi6_oif = np->mcast_oif;
530 	else if (!fl6.flowi6_oif)
531 		fl6.flowi6_oif = np->ucast_oif;
532 
533 	ipc6.tclass = np->tclass;
534 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
535 
536 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
537 	if (IS_ERR(dst))
538 		goto out;
539 
540 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
541 	ipc6.dontfrag = np->dontfrag;
542 	ipc6.opt = NULL;
543 
544 	msg.skb = skb;
545 	msg.offset = skb_network_offset(skb);
546 	msg.type = type;
547 
548 	len = skb->len - msg.offset;
549 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
550 	if (len < 0) {
551 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
552 				    &hdr->saddr, &hdr->daddr);
553 		goto out_dst_release;
554 	}
555 
556 	rcu_read_lock();
557 	idev = __in6_dev_get(skb->dev);
558 
559 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
560 			      len + sizeof(struct icmp6hdr),
561 			      sizeof(struct icmp6hdr),
562 			      &ipc6, &fl6, (struct rt6_info *)dst,
563 			      MSG_DONTWAIT, &sockc_unused);
564 	if (err) {
565 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
566 		ip6_flush_pending_frames(sk);
567 	} else {
568 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
569 						 len + sizeof(struct icmp6hdr));
570 	}
571 	rcu_read_unlock();
572 out_dst_release:
573 	dst_release(dst);
574 out:
575 	icmpv6_xmit_unlock(sk);
576 out_bh_enable:
577 	local_bh_enable();
578 }
579 
580 /* Slightly more convenient version of icmp6_send.
581  */
582 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
583 {
584 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
585 	kfree_skb(skb);
586 }
587 
588 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
589  * if sufficient data bytes are available
590  * @nhs is the size of the tunnel header(s) :
591  *  Either an IPv4 header for SIT encap
592  *         an IPv4 header + GRE header for GRE encap
593  */
594 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
595 			       unsigned int data_len)
596 {
597 	struct in6_addr temp_saddr;
598 	struct rt6_info *rt;
599 	struct sk_buff *skb2;
600 	u32 info = 0;
601 
602 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
603 		return 1;
604 
605 	/* RFC 4884 (partial) support for ICMP extensions */
606 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
607 		data_len = 0;
608 
609 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
610 
611 	if (!skb2)
612 		return 1;
613 
614 	skb_dst_drop(skb2);
615 	skb_pull(skb2, nhs);
616 	skb_reset_network_header(skb2);
617 
618 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
619 
620 	if (rt && rt->dst.dev)
621 		skb2->dev = rt->dst.dev;
622 
623 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
624 
625 	if (data_len) {
626 		/* RFC 4884 (partial) support :
627 		 * insert 0 padding at the end, before the extensions
628 		 */
629 		__skb_push(skb2, nhs);
630 		skb_reset_network_header(skb2);
631 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
632 		memset(skb2->data + data_len - nhs, 0, nhs);
633 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
634 		 * and stored in reserved[0]
635 		 */
636 		info = (data_len/8) << 24;
637 	}
638 	if (type == ICMP_TIME_EXCEEDED)
639 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
640 			   info, &temp_saddr);
641 	else
642 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
643 			   info, &temp_saddr);
644 	if (rt)
645 		ip6_rt_put(rt);
646 
647 	kfree_skb(skb2);
648 
649 	return 0;
650 }
651 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
652 
653 static void icmpv6_echo_reply(struct sk_buff *skb)
654 {
655 	struct net *net = dev_net(skb->dev);
656 	struct sock *sk;
657 	struct inet6_dev *idev;
658 	struct ipv6_pinfo *np;
659 	const struct in6_addr *saddr = NULL;
660 	struct icmp6hdr *icmph = icmp6_hdr(skb);
661 	struct icmp6hdr tmp_hdr;
662 	struct flowi6 fl6;
663 	struct icmpv6_msg msg;
664 	struct dst_entry *dst;
665 	struct ipcm6_cookie ipc6;
666 	int err = 0;
667 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
668 	struct sockcm_cookie sockc_unused = {0};
669 
670 	saddr = &ipv6_hdr(skb)->daddr;
671 
672 	if (!ipv6_unicast_destination(skb) &&
673 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
674 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
675 		saddr = NULL;
676 
677 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
678 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
679 
680 	memset(&fl6, 0, sizeof(fl6));
681 	fl6.flowi6_proto = IPPROTO_ICMPV6;
682 	fl6.daddr = ipv6_hdr(skb)->saddr;
683 	if (saddr)
684 		fl6.saddr = *saddr;
685 	fl6.flowi6_oif = skb->dev->ifindex;
686 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
687 	fl6.flowi6_mark = mark;
688 	fl6.flowi6_uid = sock_net_uid(net, NULL);
689 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
690 
691 	local_bh_disable();
692 	sk = icmpv6_xmit_lock(net);
693 	if (!sk)
694 		goto out_bh_enable;
695 	sk->sk_mark = mark;
696 	np = inet6_sk(sk);
697 
698 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
699 		fl6.flowi6_oif = np->mcast_oif;
700 	else if (!fl6.flowi6_oif)
701 		fl6.flowi6_oif = np->ucast_oif;
702 
703 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
704 	if (err)
705 		goto out;
706 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
707 	if (IS_ERR(dst))
708 		goto out;
709 
710 	idev = __in6_dev_get(skb->dev);
711 
712 	msg.skb = skb;
713 	msg.offset = 0;
714 	msg.type = ICMPV6_ECHO_REPLY;
715 
716 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
717 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
718 	ipc6.dontfrag = np->dontfrag;
719 	ipc6.opt = NULL;
720 
721 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
722 				sizeof(struct icmp6hdr), &ipc6, &fl6,
723 				(struct rt6_info *)dst, MSG_DONTWAIT,
724 				&sockc_unused);
725 
726 	if (err) {
727 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
728 		ip6_flush_pending_frames(sk);
729 	} else {
730 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
731 						 skb->len + sizeof(struct icmp6hdr));
732 	}
733 	dst_release(dst);
734 out:
735 	icmpv6_xmit_unlock(sk);
736 out_bh_enable:
737 	local_bh_enable();
738 }
739 
740 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
741 {
742 	const struct inet6_protocol *ipprot;
743 	int inner_offset;
744 	__be16 frag_off;
745 	u8 nexthdr;
746 	struct net *net = dev_net(skb->dev);
747 
748 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
749 		goto out;
750 
751 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
752 	if (ipv6_ext_hdr(nexthdr)) {
753 		/* now skip over extension headers */
754 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
755 						&nexthdr, &frag_off);
756 		if (inner_offset < 0)
757 			goto out;
758 	} else {
759 		inner_offset = sizeof(struct ipv6hdr);
760 	}
761 
762 	/* Checkin header including 8 bytes of inner protocol header. */
763 	if (!pskb_may_pull(skb, inner_offset+8))
764 		goto out;
765 
766 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
767 	   Without this we will not able f.e. to make source routed
768 	   pmtu discovery.
769 	   Corresponding argument (opt) to notifiers is already added.
770 	   --ANK (980726)
771 	 */
772 
773 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
774 	if (ipprot && ipprot->err_handler)
775 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
776 
777 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
778 	return;
779 
780 out:
781 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
782 }
783 
784 /*
785  *	Handle icmp messages
786  */
787 
788 static int icmpv6_rcv(struct sk_buff *skb)
789 {
790 	struct net_device *dev = skb->dev;
791 	struct inet6_dev *idev = __in6_dev_get(dev);
792 	const struct in6_addr *saddr, *daddr;
793 	struct icmp6hdr *hdr;
794 	u8 type;
795 	bool success = false;
796 
797 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
798 		struct sec_path *sp = skb_sec_path(skb);
799 		int nh;
800 
801 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
802 				 XFRM_STATE_ICMP))
803 			goto drop_no_count;
804 
805 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
806 			goto drop_no_count;
807 
808 		nh = skb_network_offset(skb);
809 		skb_set_network_header(skb, sizeof(*hdr));
810 
811 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
812 			goto drop_no_count;
813 
814 		skb_set_network_header(skb, nh);
815 	}
816 
817 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
818 
819 	saddr = &ipv6_hdr(skb)->saddr;
820 	daddr = &ipv6_hdr(skb)->daddr;
821 
822 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
823 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
824 				    saddr, daddr);
825 		goto csum_error;
826 	}
827 
828 	if (!pskb_pull(skb, sizeof(*hdr)))
829 		goto discard_it;
830 
831 	hdr = icmp6_hdr(skb);
832 
833 	type = hdr->icmp6_type;
834 
835 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
836 
837 	switch (type) {
838 	case ICMPV6_ECHO_REQUEST:
839 		icmpv6_echo_reply(skb);
840 		break;
841 
842 	case ICMPV6_ECHO_REPLY:
843 		success = ping_rcv(skb);
844 		break;
845 
846 	case ICMPV6_PKT_TOOBIG:
847 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
848 		   standard destination cache. Seems, only "advanced"
849 		   destination cache will allow to solve this problem
850 		   --ANK (980726)
851 		 */
852 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
853 			goto discard_it;
854 		hdr = icmp6_hdr(skb);
855 
856 		/*
857 		 *	Drop through to notify
858 		 */
859 
860 	case ICMPV6_DEST_UNREACH:
861 	case ICMPV6_TIME_EXCEED:
862 	case ICMPV6_PARAMPROB:
863 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
864 		break;
865 
866 	case NDISC_ROUTER_SOLICITATION:
867 	case NDISC_ROUTER_ADVERTISEMENT:
868 	case NDISC_NEIGHBOUR_SOLICITATION:
869 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
870 	case NDISC_REDIRECT:
871 		ndisc_rcv(skb);
872 		break;
873 
874 	case ICMPV6_MGM_QUERY:
875 		igmp6_event_query(skb);
876 		break;
877 
878 	case ICMPV6_MGM_REPORT:
879 		igmp6_event_report(skb);
880 		break;
881 
882 	case ICMPV6_MGM_REDUCTION:
883 	case ICMPV6_NI_QUERY:
884 	case ICMPV6_NI_REPLY:
885 	case ICMPV6_MLD2_REPORT:
886 	case ICMPV6_DHAAD_REQUEST:
887 	case ICMPV6_DHAAD_REPLY:
888 	case ICMPV6_MOBILE_PREFIX_SOL:
889 	case ICMPV6_MOBILE_PREFIX_ADV:
890 		break;
891 
892 	default:
893 		/* informational */
894 		if (type & ICMPV6_INFOMSG_MASK)
895 			break;
896 
897 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
898 				    saddr, daddr);
899 
900 		/*
901 		 * error of unknown type.
902 		 * must pass to upper level
903 		 */
904 
905 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
906 	}
907 
908 	/* until the v6 path can be better sorted assume failure and
909 	 * preserve the status quo behaviour for the rest of the paths to here
910 	 */
911 	if (success)
912 		consume_skb(skb);
913 	else
914 		kfree_skb(skb);
915 
916 	return 0;
917 
918 csum_error:
919 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
920 discard_it:
921 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
922 drop_no_count:
923 	kfree_skb(skb);
924 	return 0;
925 }
926 
927 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
928 		      u8 type,
929 		      const struct in6_addr *saddr,
930 		      const struct in6_addr *daddr,
931 		      int oif)
932 {
933 	memset(fl6, 0, sizeof(*fl6));
934 	fl6->saddr = *saddr;
935 	fl6->daddr = *daddr;
936 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
937 	fl6->fl6_icmp_type	= type;
938 	fl6->fl6_icmp_code	= 0;
939 	fl6->flowi6_oif		= oif;
940 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
941 }
942 
943 static int __net_init icmpv6_sk_init(struct net *net)
944 {
945 	struct sock *sk;
946 	int err, i, j;
947 
948 	net->ipv6.icmp_sk =
949 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
950 	if (!net->ipv6.icmp_sk)
951 		return -ENOMEM;
952 
953 	for_each_possible_cpu(i) {
954 		err = inet_ctl_sock_create(&sk, PF_INET6,
955 					   SOCK_RAW, IPPROTO_ICMPV6, net);
956 		if (err < 0) {
957 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
958 			       err);
959 			goto fail;
960 		}
961 
962 		net->ipv6.icmp_sk[i] = sk;
963 
964 		/* Enough space for 2 64K ICMP packets, including
965 		 * sk_buff struct overhead.
966 		 */
967 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
968 	}
969 	return 0;
970 
971  fail:
972 	for (j = 0; j < i; j++)
973 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
974 	kfree(net->ipv6.icmp_sk);
975 	return err;
976 }
977 
978 static void __net_exit icmpv6_sk_exit(struct net *net)
979 {
980 	int i;
981 
982 	for_each_possible_cpu(i) {
983 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
984 	}
985 	kfree(net->ipv6.icmp_sk);
986 }
987 
988 static struct pernet_operations icmpv6_sk_ops = {
989 	.init = icmpv6_sk_init,
990 	.exit = icmpv6_sk_exit,
991 };
992 
993 int __init icmpv6_init(void)
994 {
995 	int err;
996 
997 	err = register_pernet_subsys(&icmpv6_sk_ops);
998 	if (err < 0)
999 		return err;
1000 
1001 	err = -EAGAIN;
1002 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1003 		goto fail;
1004 
1005 	err = inet6_register_icmp_sender(icmp6_send);
1006 	if (err)
1007 		goto sender_reg_err;
1008 	return 0;
1009 
1010 sender_reg_err:
1011 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1012 fail:
1013 	pr_err("Failed to register ICMP6 protocol\n");
1014 	unregister_pernet_subsys(&icmpv6_sk_ops);
1015 	return err;
1016 }
1017 
1018 void icmpv6_cleanup(void)
1019 {
1020 	inet6_unregister_icmp_sender(icmp6_send);
1021 	unregister_pernet_subsys(&icmpv6_sk_ops);
1022 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1023 }
1024 
1025 
1026 static const struct icmp6_err {
1027 	int err;
1028 	int fatal;
1029 } tab_unreach[] = {
1030 	{	/* NOROUTE */
1031 		.err	= ENETUNREACH,
1032 		.fatal	= 0,
1033 	},
1034 	{	/* ADM_PROHIBITED */
1035 		.err	= EACCES,
1036 		.fatal	= 1,
1037 	},
1038 	{	/* Was NOT_NEIGHBOUR, now reserved */
1039 		.err	= EHOSTUNREACH,
1040 		.fatal	= 0,
1041 	},
1042 	{	/* ADDR_UNREACH	*/
1043 		.err	= EHOSTUNREACH,
1044 		.fatal	= 0,
1045 	},
1046 	{	/* PORT_UNREACH	*/
1047 		.err	= ECONNREFUSED,
1048 		.fatal	= 1,
1049 	},
1050 	{	/* POLICY_FAIL */
1051 		.err	= EACCES,
1052 		.fatal	= 1,
1053 	},
1054 	{	/* REJECT_ROUTE	*/
1055 		.err	= EACCES,
1056 		.fatal	= 1,
1057 	},
1058 };
1059 
1060 int icmpv6_err_convert(u8 type, u8 code, int *err)
1061 {
1062 	int fatal = 0;
1063 
1064 	*err = EPROTO;
1065 
1066 	switch (type) {
1067 	case ICMPV6_DEST_UNREACH:
1068 		fatal = 1;
1069 		if (code < ARRAY_SIZE(tab_unreach)) {
1070 			*err  = tab_unreach[code].err;
1071 			fatal = tab_unreach[code].fatal;
1072 		}
1073 		break;
1074 
1075 	case ICMPV6_PKT_TOOBIG:
1076 		*err = EMSGSIZE;
1077 		break;
1078 
1079 	case ICMPV6_PARAMPROB:
1080 		*err = EPROTO;
1081 		fatal = 1;
1082 		break;
1083 
1084 	case ICMPV6_TIME_EXCEED:
1085 		*err = EHOSTUNREACH;
1086 		break;
1087 	}
1088 
1089 	return fatal;
1090 }
1091 EXPORT_SYMBOL(icmpv6_err_convert);
1092 
1093 #ifdef CONFIG_SYSCTL
1094 static struct ctl_table ipv6_icmp_table_template[] = {
1095 	{
1096 		.procname	= "ratelimit",
1097 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1098 		.maxlen		= sizeof(int),
1099 		.mode		= 0644,
1100 		.proc_handler	= proc_dointvec_ms_jiffies,
1101 	},
1102 	{ },
1103 };
1104 
1105 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1106 {
1107 	struct ctl_table *table;
1108 
1109 	table = kmemdup(ipv6_icmp_table_template,
1110 			sizeof(ipv6_icmp_table_template),
1111 			GFP_KERNEL);
1112 
1113 	if (table)
1114 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1115 
1116 	return table;
1117 }
1118 #endif
1119