xref: /openbmc/linux/net/ipv6/icmp.c (revision 79dc7e3f)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <asm/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0);
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0);
98 
99 	if (!(type & ICMPV6_INFOMSG_MASK))
100 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
101 			ping_err(skb, offset, ntohl(info));
102 }
103 
104 static int icmpv6_rcv(struct sk_buff *skb);
105 
106 static const struct inet6_protocol icmpv6_protocol = {
107 	.handler	=	icmpv6_rcv,
108 	.err_handler	=	icmpv6_err,
109 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
110 };
111 
112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113 {
114 	struct sock *sk;
115 
116 	local_bh_disable();
117 
118 	sk = icmpv6_sk(net);
119 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
120 		/* This can happen if the output path (f.e. SIT or
121 		 * ip6ip6 tunnel) signals dst_link_failure() for an
122 		 * outgoing ICMP6 packet.
123 		 */
124 		local_bh_enable();
125 		return NULL;
126 	}
127 	return sk;
128 }
129 
130 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
131 {
132 	spin_unlock_bh(&sk->sk_lock.slock);
133 }
134 
135 /*
136  * Figure out, may we reply to this packet with icmp error.
137  *
138  * We do not reply, if:
139  *	- it was icmp error message.
140  *	- it is truncated, so that it is known, that protocol is ICMPV6
141  *	  (i.e. in the middle of some exthdr)
142  *
143  *	--ANK (980726)
144  */
145 
146 static bool is_ineligible(const struct sk_buff *skb)
147 {
148 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
149 	int len = skb->len - ptr;
150 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
151 	__be16 frag_off;
152 
153 	if (len < 0)
154 		return true;
155 
156 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
157 	if (ptr < 0)
158 		return false;
159 	if (nexthdr == IPPROTO_ICMPV6) {
160 		u8 _type, *tp;
161 		tp = skb_header_pointer(skb,
162 			ptr+offsetof(struct icmp6hdr, icmp6_type),
163 			sizeof(_type), &_type);
164 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
165 			return true;
166 	}
167 	return false;
168 }
169 
170 /*
171  * Check the ICMP output rate limit
172  */
173 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
174 			       struct flowi6 *fl6)
175 {
176 	struct net *net = sock_net(sk);
177 	struct dst_entry *dst;
178 	bool res = false;
179 
180 	/* Informational messages are not limited. */
181 	if (type & ICMPV6_INFOMSG_MASK)
182 		return true;
183 
184 	/* Do not limit pmtu discovery, it would break it. */
185 	if (type == ICMPV6_PKT_TOOBIG)
186 		return true;
187 
188 	/*
189 	 * Look up the output route.
190 	 * XXX: perhaps the expire for routing entries cloned by
191 	 * this lookup should be more aggressive (not longer than timeout).
192 	 */
193 	dst = ip6_route_output(net, sk, fl6);
194 	if (dst->error) {
195 		IP6_INC_STATS(net, ip6_dst_idev(dst),
196 			      IPSTATS_MIB_OUTNOROUTES);
197 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
198 		res = true;
199 	} else {
200 		struct rt6_info *rt = (struct rt6_info *)dst;
201 		int tmo = net->ipv6.sysctl.icmpv6_time;
202 
203 		/* Give more bandwidth to wider prefixes. */
204 		if (rt->rt6i_dst.plen < 128)
205 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
206 
207 		if (icmp_global_allow()) {
208 			struct inet_peer *peer;
209 
210 			peer = inet_getpeer_v6(net->ipv6.peers,
211 					       &fl6->daddr, 1);
212 			res = inet_peer_xrlim_allow(peer, tmo);
213 			if (peer)
214 				inet_putpeer(peer);
215 		}
216 	}
217 	dst_release(dst);
218 	return res;
219 }
220 
221 /*
222  *	an inline helper for the "simple" if statement below
223  *	checks if parameter problem report is caused by an
224  *	unrecognized IPv6 option that has the Option Type
225  *	highest-order two bits set to 10
226  */
227 
228 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
229 {
230 	u8 _optval, *op;
231 
232 	offset += skb_network_offset(skb);
233 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
234 	if (!op)
235 		return true;
236 	return (*op & 0xC0) == 0x80;
237 }
238 
239 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
240 			       struct icmp6hdr *thdr, int len)
241 {
242 	struct sk_buff *skb;
243 	struct icmp6hdr *icmp6h;
244 	int err = 0;
245 
246 	skb = skb_peek(&sk->sk_write_queue);
247 	if (!skb)
248 		goto out;
249 
250 	icmp6h = icmp6_hdr(skb);
251 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
252 	icmp6h->icmp6_cksum = 0;
253 
254 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
255 		skb->csum = csum_partial(icmp6h,
256 					sizeof(struct icmp6hdr), skb->csum);
257 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
258 						      &fl6->daddr,
259 						      len, fl6->flowi6_proto,
260 						      skb->csum);
261 	} else {
262 		__wsum tmp_csum = 0;
263 
264 		skb_queue_walk(&sk->sk_write_queue, skb) {
265 			tmp_csum = csum_add(tmp_csum, skb->csum);
266 		}
267 
268 		tmp_csum = csum_partial(icmp6h,
269 					sizeof(struct icmp6hdr), tmp_csum);
270 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
271 						      &fl6->daddr,
272 						      len, fl6->flowi6_proto,
273 						      tmp_csum);
274 	}
275 	ip6_push_pending_frames(sk);
276 out:
277 	return err;
278 }
279 
280 struct icmpv6_msg {
281 	struct sk_buff	*skb;
282 	int		offset;
283 	uint8_t		type;
284 };
285 
286 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
287 {
288 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
289 	struct sk_buff *org_skb = msg->skb;
290 	__wsum csum = 0;
291 
292 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
293 				      to, len, csum);
294 	skb->csum = csum_block_add(skb->csum, csum, odd);
295 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
296 		nf_ct_attach(skb, org_skb);
297 	return 0;
298 }
299 
300 #if IS_ENABLED(CONFIG_IPV6_MIP6)
301 static void mip6_addr_swap(struct sk_buff *skb)
302 {
303 	struct ipv6hdr *iph = ipv6_hdr(skb);
304 	struct inet6_skb_parm *opt = IP6CB(skb);
305 	struct ipv6_destopt_hao *hao;
306 	struct in6_addr tmp;
307 	int off;
308 
309 	if (opt->dsthao) {
310 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
311 		if (likely(off >= 0)) {
312 			hao = (struct ipv6_destopt_hao *)
313 					(skb_network_header(skb) + off);
314 			tmp = iph->saddr;
315 			iph->saddr = hao->addr;
316 			hao->addr = tmp;
317 		}
318 	}
319 }
320 #else
321 static inline void mip6_addr_swap(struct sk_buff *skb) {}
322 #endif
323 
324 static struct dst_entry *icmpv6_route_lookup(struct net *net,
325 					     struct sk_buff *skb,
326 					     struct sock *sk,
327 					     struct flowi6 *fl6)
328 {
329 	struct dst_entry *dst, *dst2;
330 	struct flowi6 fl2;
331 	int err;
332 
333 	err = ip6_dst_lookup(net, sk, &dst, fl6);
334 	if (err)
335 		return ERR_PTR(err);
336 
337 	/*
338 	 * We won't send icmp if the destination is known
339 	 * anycast.
340 	 */
341 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
342 		net_dbg_ratelimited("icmp6_send: acast source\n");
343 		dst_release(dst);
344 		return ERR_PTR(-EINVAL);
345 	}
346 
347 	/* No need to clone since we're just using its address. */
348 	dst2 = dst;
349 
350 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
351 	if (!IS_ERR(dst)) {
352 		if (dst != dst2)
353 			return dst;
354 	} else {
355 		if (PTR_ERR(dst) == -EPERM)
356 			dst = NULL;
357 		else
358 			return dst;
359 	}
360 
361 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
362 	if (err)
363 		goto relookup_failed;
364 
365 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
366 	if (err)
367 		goto relookup_failed;
368 
369 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
370 	if (!IS_ERR(dst2)) {
371 		dst_release(dst);
372 		dst = dst2;
373 	} else {
374 		err = PTR_ERR(dst2);
375 		if (err == -EPERM) {
376 			dst_release(dst);
377 			return dst2;
378 		} else
379 			goto relookup_failed;
380 	}
381 
382 relookup_failed:
383 	if (dst)
384 		return dst;
385 	return ERR_PTR(err);
386 }
387 
388 /*
389  *	Send an ICMP message in response to a packet in error
390  */
391 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
392 		       const struct in6_addr *force_saddr)
393 {
394 	struct net *net = dev_net(skb->dev);
395 	struct inet6_dev *idev = NULL;
396 	struct ipv6hdr *hdr = ipv6_hdr(skb);
397 	struct sock *sk;
398 	struct ipv6_pinfo *np;
399 	const struct in6_addr *saddr = NULL;
400 	struct dst_entry *dst;
401 	struct icmp6hdr tmp_hdr;
402 	struct flowi6 fl6;
403 	struct icmpv6_msg msg;
404 	struct sockcm_cookie sockc_unused = {0};
405 	struct ipcm6_cookie ipc6;
406 	int iif = 0;
407 	int addr_type = 0;
408 	int len;
409 	int err = 0;
410 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
411 
412 	if ((u8 *)hdr < skb->head ||
413 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
414 		return;
415 
416 	/*
417 	 *	Make sure we respect the rules
418 	 *	i.e. RFC 1885 2.4(e)
419 	 *	Rule (e.1) is enforced by not using icmp6_send
420 	 *	in any code that processes icmp errors.
421 	 */
422 	addr_type = ipv6_addr_type(&hdr->daddr);
423 
424 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
425 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
426 		saddr = &hdr->daddr;
427 
428 	/*
429 	 *	Dest addr check
430 	 */
431 
432 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
433 		if (type != ICMPV6_PKT_TOOBIG &&
434 		    !(type == ICMPV6_PARAMPROB &&
435 		      code == ICMPV6_UNK_OPTION &&
436 		      (opt_unrec(skb, info))))
437 			return;
438 
439 		saddr = NULL;
440 	}
441 
442 	addr_type = ipv6_addr_type(&hdr->saddr);
443 
444 	/*
445 	 *	Source addr check
446 	 */
447 
448 	if (__ipv6_addr_needs_scope_id(addr_type))
449 		iif = skb->dev->ifindex;
450 	else {
451 		dst = skb_dst(skb);
452 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
453 	}
454 
455 	/*
456 	 *	Must not send error if the source does not uniquely
457 	 *	identify a single node (RFC2463 Section 2.4).
458 	 *	We check unspecified / multicast addresses here,
459 	 *	and anycast addresses will be checked later.
460 	 */
461 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
462 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
463 				    &hdr->saddr, &hdr->daddr);
464 		return;
465 	}
466 
467 	/*
468 	 *	Never answer to a ICMP packet.
469 	 */
470 	if (is_ineligible(skb)) {
471 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
472 				    &hdr->saddr, &hdr->daddr);
473 		return;
474 	}
475 
476 	mip6_addr_swap(skb);
477 
478 	memset(&fl6, 0, sizeof(fl6));
479 	fl6.flowi6_proto = IPPROTO_ICMPV6;
480 	fl6.daddr = hdr->saddr;
481 	if (force_saddr)
482 		saddr = force_saddr;
483 	if (saddr)
484 		fl6.saddr = *saddr;
485 	fl6.flowi6_mark = mark;
486 	fl6.flowi6_oif = iif;
487 	fl6.fl6_icmp_type = type;
488 	fl6.fl6_icmp_code = code;
489 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
490 
491 	sk = icmpv6_xmit_lock(net);
492 	if (!sk)
493 		return;
494 	sk->sk_mark = mark;
495 	np = inet6_sk(sk);
496 
497 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
498 		goto out;
499 
500 	tmp_hdr.icmp6_type = type;
501 	tmp_hdr.icmp6_code = code;
502 	tmp_hdr.icmp6_cksum = 0;
503 	tmp_hdr.icmp6_pointer = htonl(info);
504 
505 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
506 		fl6.flowi6_oif = np->mcast_oif;
507 	else if (!fl6.flowi6_oif)
508 		fl6.flowi6_oif = np->ucast_oif;
509 
510 	ipc6.tclass = np->tclass;
511 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
512 
513 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
514 	if (IS_ERR(dst))
515 		goto out;
516 
517 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
518 	ipc6.dontfrag = np->dontfrag;
519 	ipc6.opt = NULL;
520 
521 	msg.skb = skb;
522 	msg.offset = skb_network_offset(skb);
523 	msg.type = type;
524 
525 	len = skb->len - msg.offset;
526 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
527 	if (len < 0) {
528 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
529 				    &hdr->saddr, &hdr->daddr);
530 		goto out_dst_release;
531 	}
532 
533 	rcu_read_lock();
534 	idev = __in6_dev_get(skb->dev);
535 
536 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
537 			      len + sizeof(struct icmp6hdr),
538 			      sizeof(struct icmp6hdr),
539 			      &ipc6, &fl6, (struct rt6_info *)dst,
540 			      MSG_DONTWAIT, &sockc_unused);
541 	if (err) {
542 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
543 		ip6_flush_pending_frames(sk);
544 	} else {
545 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
546 						 len + sizeof(struct icmp6hdr));
547 	}
548 	rcu_read_unlock();
549 out_dst_release:
550 	dst_release(dst);
551 out:
552 	icmpv6_xmit_unlock(sk);
553 }
554 
555 /* Slightly more convenient version of icmp6_send.
556  */
557 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
558 {
559 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
560 	kfree_skb(skb);
561 }
562 
563 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
564  * if sufficient data bytes are available
565  * @nhs is the size of the tunnel header(s) :
566  *  Either an IPv4 header for SIT encap
567  *         an IPv4 header + GRE header for GRE encap
568  */
569 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
570 			       unsigned int data_len)
571 {
572 	struct in6_addr temp_saddr;
573 	struct rt6_info *rt;
574 	struct sk_buff *skb2;
575 	u32 info = 0;
576 
577 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
578 		return 1;
579 
580 	/* RFC 4884 (partial) support for ICMP extensions */
581 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
582 		data_len = 0;
583 
584 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
585 
586 	if (!skb2)
587 		return 1;
588 
589 	skb_dst_drop(skb2);
590 	skb_pull(skb2, nhs);
591 	skb_reset_network_header(skb2);
592 
593 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
594 
595 	if (rt && rt->dst.dev)
596 		skb2->dev = rt->dst.dev;
597 
598 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
599 
600 	if (data_len) {
601 		/* RFC 4884 (partial) support :
602 		 * insert 0 padding at the end, before the extensions
603 		 */
604 		__skb_push(skb2, nhs);
605 		skb_reset_network_header(skb2);
606 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
607 		memset(skb2->data + data_len - nhs, 0, nhs);
608 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
609 		 * and stored in reserved[0]
610 		 */
611 		info = (data_len/8) << 24;
612 	}
613 	if (type == ICMP_TIME_EXCEEDED)
614 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
615 			   info, &temp_saddr);
616 	else
617 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
618 			   info, &temp_saddr);
619 	if (rt)
620 		ip6_rt_put(rt);
621 
622 	kfree_skb(skb2);
623 
624 	return 0;
625 }
626 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
627 
628 static void icmpv6_echo_reply(struct sk_buff *skb)
629 {
630 	struct net *net = dev_net(skb->dev);
631 	struct sock *sk;
632 	struct inet6_dev *idev;
633 	struct ipv6_pinfo *np;
634 	const struct in6_addr *saddr = NULL;
635 	struct icmp6hdr *icmph = icmp6_hdr(skb);
636 	struct icmp6hdr tmp_hdr;
637 	struct flowi6 fl6;
638 	struct icmpv6_msg msg;
639 	struct dst_entry *dst;
640 	struct ipcm6_cookie ipc6;
641 	int err = 0;
642 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
643 	struct sockcm_cookie sockc_unused = {0};
644 
645 	saddr = &ipv6_hdr(skb)->daddr;
646 
647 	if (!ipv6_unicast_destination(skb) &&
648 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
649 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
650 		saddr = NULL;
651 
652 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
653 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
654 
655 	memset(&fl6, 0, sizeof(fl6));
656 	fl6.flowi6_proto = IPPROTO_ICMPV6;
657 	fl6.daddr = ipv6_hdr(skb)->saddr;
658 	if (saddr)
659 		fl6.saddr = *saddr;
660 	fl6.flowi6_oif = skb->dev->ifindex;
661 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
662 	fl6.flowi6_mark = mark;
663 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
664 
665 	sk = icmpv6_xmit_lock(net);
666 	if (!sk)
667 		return;
668 	sk->sk_mark = mark;
669 	np = inet6_sk(sk);
670 
671 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
672 		fl6.flowi6_oif = np->mcast_oif;
673 	else if (!fl6.flowi6_oif)
674 		fl6.flowi6_oif = np->ucast_oif;
675 
676 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
677 	if (err)
678 		goto out;
679 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
680 	if (IS_ERR(dst))
681 		goto out;
682 
683 	idev = __in6_dev_get(skb->dev);
684 
685 	msg.skb = skb;
686 	msg.offset = 0;
687 	msg.type = ICMPV6_ECHO_REPLY;
688 
689 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
690 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
691 	ipc6.dontfrag = np->dontfrag;
692 	ipc6.opt = NULL;
693 
694 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
695 				sizeof(struct icmp6hdr), &ipc6, &fl6,
696 				(struct rt6_info *)dst, MSG_DONTWAIT,
697 				&sockc_unused);
698 
699 	if (err) {
700 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
701 		ip6_flush_pending_frames(sk);
702 	} else {
703 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
704 						 skb->len + sizeof(struct icmp6hdr));
705 	}
706 	dst_release(dst);
707 out:
708 	icmpv6_xmit_unlock(sk);
709 }
710 
711 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
712 {
713 	const struct inet6_protocol *ipprot;
714 	int inner_offset;
715 	__be16 frag_off;
716 	u8 nexthdr;
717 	struct net *net = dev_net(skb->dev);
718 
719 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
720 		goto out;
721 
722 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
723 	if (ipv6_ext_hdr(nexthdr)) {
724 		/* now skip over extension headers */
725 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
726 						&nexthdr, &frag_off);
727 		if (inner_offset < 0)
728 			goto out;
729 	} else {
730 		inner_offset = sizeof(struct ipv6hdr);
731 	}
732 
733 	/* Checkin header including 8 bytes of inner protocol header. */
734 	if (!pskb_may_pull(skb, inner_offset+8))
735 		goto out;
736 
737 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
738 	   Without this we will not able f.e. to make source routed
739 	   pmtu discovery.
740 	   Corresponding argument (opt) to notifiers is already added.
741 	   --ANK (980726)
742 	 */
743 
744 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
745 	if (ipprot && ipprot->err_handler)
746 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
747 
748 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
749 	return;
750 
751 out:
752 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
753 }
754 
755 /*
756  *	Handle icmp messages
757  */
758 
759 static int icmpv6_rcv(struct sk_buff *skb)
760 {
761 	struct net_device *dev = skb->dev;
762 	struct inet6_dev *idev = __in6_dev_get(dev);
763 	const struct in6_addr *saddr, *daddr;
764 	struct icmp6hdr *hdr;
765 	u8 type;
766 	bool success = false;
767 
768 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
769 		struct sec_path *sp = skb_sec_path(skb);
770 		int nh;
771 
772 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
773 				 XFRM_STATE_ICMP))
774 			goto drop_no_count;
775 
776 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
777 			goto drop_no_count;
778 
779 		nh = skb_network_offset(skb);
780 		skb_set_network_header(skb, sizeof(*hdr));
781 
782 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
783 			goto drop_no_count;
784 
785 		skb_set_network_header(skb, nh);
786 	}
787 
788 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
789 
790 	saddr = &ipv6_hdr(skb)->saddr;
791 	daddr = &ipv6_hdr(skb)->daddr;
792 
793 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
794 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
795 				    saddr, daddr);
796 		goto csum_error;
797 	}
798 
799 	if (!pskb_pull(skb, sizeof(*hdr)))
800 		goto discard_it;
801 
802 	hdr = icmp6_hdr(skb);
803 
804 	type = hdr->icmp6_type;
805 
806 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
807 
808 	switch (type) {
809 	case ICMPV6_ECHO_REQUEST:
810 		icmpv6_echo_reply(skb);
811 		break;
812 
813 	case ICMPV6_ECHO_REPLY:
814 		success = ping_rcv(skb);
815 		break;
816 
817 	case ICMPV6_PKT_TOOBIG:
818 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
819 		   standard destination cache. Seems, only "advanced"
820 		   destination cache will allow to solve this problem
821 		   --ANK (980726)
822 		 */
823 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
824 			goto discard_it;
825 		hdr = icmp6_hdr(skb);
826 
827 		/*
828 		 *	Drop through to notify
829 		 */
830 
831 	case ICMPV6_DEST_UNREACH:
832 	case ICMPV6_TIME_EXCEED:
833 	case ICMPV6_PARAMPROB:
834 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
835 		break;
836 
837 	case NDISC_ROUTER_SOLICITATION:
838 	case NDISC_ROUTER_ADVERTISEMENT:
839 	case NDISC_NEIGHBOUR_SOLICITATION:
840 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
841 	case NDISC_REDIRECT:
842 		ndisc_rcv(skb);
843 		break;
844 
845 	case ICMPV6_MGM_QUERY:
846 		igmp6_event_query(skb);
847 		break;
848 
849 	case ICMPV6_MGM_REPORT:
850 		igmp6_event_report(skb);
851 		break;
852 
853 	case ICMPV6_MGM_REDUCTION:
854 	case ICMPV6_NI_QUERY:
855 	case ICMPV6_NI_REPLY:
856 	case ICMPV6_MLD2_REPORT:
857 	case ICMPV6_DHAAD_REQUEST:
858 	case ICMPV6_DHAAD_REPLY:
859 	case ICMPV6_MOBILE_PREFIX_SOL:
860 	case ICMPV6_MOBILE_PREFIX_ADV:
861 		break;
862 
863 	default:
864 		/* informational */
865 		if (type & ICMPV6_INFOMSG_MASK)
866 			break;
867 
868 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
869 				    saddr, daddr);
870 
871 		/*
872 		 * error of unknown type.
873 		 * must pass to upper level
874 		 */
875 
876 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
877 	}
878 
879 	/* until the v6 path can be better sorted assume failure and
880 	 * preserve the status quo behaviour for the rest of the paths to here
881 	 */
882 	if (success)
883 		consume_skb(skb);
884 	else
885 		kfree_skb(skb);
886 
887 	return 0;
888 
889 csum_error:
890 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
891 discard_it:
892 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
893 drop_no_count:
894 	kfree_skb(skb);
895 	return 0;
896 }
897 
898 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
899 		      u8 type,
900 		      const struct in6_addr *saddr,
901 		      const struct in6_addr *daddr,
902 		      int oif)
903 {
904 	memset(fl6, 0, sizeof(*fl6));
905 	fl6->saddr = *saddr;
906 	fl6->daddr = *daddr;
907 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
908 	fl6->fl6_icmp_type	= type;
909 	fl6->fl6_icmp_code	= 0;
910 	fl6->flowi6_oif		= oif;
911 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
912 }
913 
914 static int __net_init icmpv6_sk_init(struct net *net)
915 {
916 	struct sock *sk;
917 	int err, i, j;
918 
919 	net->ipv6.icmp_sk =
920 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
921 	if (!net->ipv6.icmp_sk)
922 		return -ENOMEM;
923 
924 	for_each_possible_cpu(i) {
925 		err = inet_ctl_sock_create(&sk, PF_INET6,
926 					   SOCK_RAW, IPPROTO_ICMPV6, net);
927 		if (err < 0) {
928 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
929 			       err);
930 			goto fail;
931 		}
932 
933 		net->ipv6.icmp_sk[i] = sk;
934 
935 		/* Enough space for 2 64K ICMP packets, including
936 		 * sk_buff struct overhead.
937 		 */
938 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
939 	}
940 	return 0;
941 
942  fail:
943 	for (j = 0; j < i; j++)
944 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
945 	kfree(net->ipv6.icmp_sk);
946 	return err;
947 }
948 
949 static void __net_exit icmpv6_sk_exit(struct net *net)
950 {
951 	int i;
952 
953 	for_each_possible_cpu(i) {
954 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
955 	}
956 	kfree(net->ipv6.icmp_sk);
957 }
958 
959 static struct pernet_operations icmpv6_sk_ops = {
960 	.init = icmpv6_sk_init,
961 	.exit = icmpv6_sk_exit,
962 };
963 
964 int __init icmpv6_init(void)
965 {
966 	int err;
967 
968 	err = register_pernet_subsys(&icmpv6_sk_ops);
969 	if (err < 0)
970 		return err;
971 
972 	err = -EAGAIN;
973 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
974 		goto fail;
975 
976 	err = inet6_register_icmp_sender(icmp6_send);
977 	if (err)
978 		goto sender_reg_err;
979 	return 0;
980 
981 sender_reg_err:
982 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
983 fail:
984 	pr_err("Failed to register ICMP6 protocol\n");
985 	unregister_pernet_subsys(&icmpv6_sk_ops);
986 	return err;
987 }
988 
989 void icmpv6_cleanup(void)
990 {
991 	inet6_unregister_icmp_sender(icmp6_send);
992 	unregister_pernet_subsys(&icmpv6_sk_ops);
993 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
994 }
995 
996 
997 static const struct icmp6_err {
998 	int err;
999 	int fatal;
1000 } tab_unreach[] = {
1001 	{	/* NOROUTE */
1002 		.err	= ENETUNREACH,
1003 		.fatal	= 0,
1004 	},
1005 	{	/* ADM_PROHIBITED */
1006 		.err	= EACCES,
1007 		.fatal	= 1,
1008 	},
1009 	{	/* Was NOT_NEIGHBOUR, now reserved */
1010 		.err	= EHOSTUNREACH,
1011 		.fatal	= 0,
1012 	},
1013 	{	/* ADDR_UNREACH	*/
1014 		.err	= EHOSTUNREACH,
1015 		.fatal	= 0,
1016 	},
1017 	{	/* PORT_UNREACH	*/
1018 		.err	= ECONNREFUSED,
1019 		.fatal	= 1,
1020 	},
1021 	{	/* POLICY_FAIL */
1022 		.err	= EACCES,
1023 		.fatal	= 1,
1024 	},
1025 	{	/* REJECT_ROUTE	*/
1026 		.err	= EACCES,
1027 		.fatal	= 1,
1028 	},
1029 };
1030 
1031 int icmpv6_err_convert(u8 type, u8 code, int *err)
1032 {
1033 	int fatal = 0;
1034 
1035 	*err = EPROTO;
1036 
1037 	switch (type) {
1038 	case ICMPV6_DEST_UNREACH:
1039 		fatal = 1;
1040 		if (code < ARRAY_SIZE(tab_unreach)) {
1041 			*err  = tab_unreach[code].err;
1042 			fatal = tab_unreach[code].fatal;
1043 		}
1044 		break;
1045 
1046 	case ICMPV6_PKT_TOOBIG:
1047 		*err = EMSGSIZE;
1048 		break;
1049 
1050 	case ICMPV6_PARAMPROB:
1051 		*err = EPROTO;
1052 		fatal = 1;
1053 		break;
1054 
1055 	case ICMPV6_TIME_EXCEED:
1056 		*err = EHOSTUNREACH;
1057 		break;
1058 	}
1059 
1060 	return fatal;
1061 }
1062 EXPORT_SYMBOL(icmpv6_err_convert);
1063 
1064 #ifdef CONFIG_SYSCTL
1065 static struct ctl_table ipv6_icmp_table_template[] = {
1066 	{
1067 		.procname	= "ratelimit",
1068 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1069 		.maxlen		= sizeof(int),
1070 		.mode		= 0644,
1071 		.proc_handler	= proc_dointvec_ms_jiffies,
1072 	},
1073 	{ },
1074 };
1075 
1076 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1077 {
1078 	struct ctl_table *table;
1079 
1080 	table = kmemdup(ipv6_icmp_table_template,
1081 			sizeof(ipv6_icmp_table_template),
1082 			GFP_KERNEL);
1083 
1084 	if (table)
1085 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1086 
1087 	return table;
1088 }
1089 #endif
1090