xref: /openbmc/linux/net/ipv6/icmp.c (revision e4781421e883340b796da5a724bda7226817990b)
1 /*
2  *	Internet Control Message Protocol (ICMPv6)
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on net/ipv4/icmp.c
9  *
10  *	RFC 1885
11  *
12  *	This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  */
17 
18 /*
19  *	Changes:
20  *
21  *	Andi Kleen		:	exception handling
22  *	Andi Kleen			add rate limits. never reply to a icmp.
23  *					add more length checks and other fixes.
24  *	yoshfuji		:	ensure to sent parameter problem for
25  *					fragments.
26  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
27  *	Randy Dunlap and
28  *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
29  *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
30  */
31 
32 #define pr_fmt(fmt) "IPv6: " fmt
33 
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
38 #include <linux/in.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
45 #include <linux/slab.h>
46 
47 #ifdef CONFIG_SYSCTL
48 #include <linux/sysctl.h>
49 #endif
50 
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
54 
55 #include <net/ip.h>
56 #include <net/sock.h>
57 
58 #include <net/ipv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/ping.h>
61 #include <net/protocol.h>
62 #include <net/raw.h>
63 #include <net/rawv6.h>
64 #include <net/transp_v6.h>
65 #include <net/ip6_route.h>
66 #include <net/addrconf.h>
67 #include <net/icmp.h>
68 #include <net/xfrm.h>
69 #include <net/inet_common.h>
70 #include <net/dsfield.h>
71 #include <net/l3mdev.h>
72 
73 #include <linux/uaccess.h>
74 
75 /*
76  *	The ICMP socket(s). This is the most convenient way to flow control
77  *	our ICMP output as well as maintain a clean interface throughout
78  *	all layers. All Socketless IP sends will soon be gone.
79  *
80  *	On SMP we have one ICMP socket per-cpu.
81  */
82 static inline struct sock *icmpv6_sk(struct net *net)
83 {
84 	return net->ipv6.icmp_sk[smp_processor_id()];
85 }
86 
87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 		       u8 type, u8 code, int offset, __be32 info)
89 {
90 	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
91 	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
92 	struct net *net = dev_net(skb->dev);
93 
94 	if (type == ICMPV6_PKT_TOOBIG)
95 		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
96 	else if (type == NDISC_REDIRECT)
97 		ip6_redirect(skb, net, skb->dev->ifindex, 0,
98 			     sock_net_uid(net, NULL));
99 
100 	if (!(type & ICMPV6_INFOMSG_MASK))
101 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
102 			ping_err(skb, offset, ntohl(info));
103 }
104 
105 static int icmpv6_rcv(struct sk_buff *skb);
106 
107 static const struct inet6_protocol icmpv6_protocol = {
108 	.handler	=	icmpv6_rcv,
109 	.err_handler	=	icmpv6_err,
110 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
111 };
112 
113 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
114 {
115 	struct sock *sk;
116 
117 	local_bh_disable();
118 
119 	sk = icmpv6_sk(net);
120 	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
121 		/* This can happen if the output path (f.e. SIT or
122 		 * ip6ip6 tunnel) signals dst_link_failure() for an
123 		 * outgoing ICMP6 packet.
124 		 */
125 		local_bh_enable();
126 		return NULL;
127 	}
128 	return sk;
129 }
130 
131 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
132 {
133 	spin_unlock_bh(&sk->sk_lock.slock);
134 }
135 
136 /*
137  * Figure out, may we reply to this packet with icmp error.
138  *
139  * We do not reply, if:
140  *	- it was icmp error message.
141  *	- it is truncated, so that it is known, that protocol is ICMPV6
142  *	  (i.e. in the middle of some exthdr)
143  *
144  *	--ANK (980726)
145  */
146 
147 static bool is_ineligible(const struct sk_buff *skb)
148 {
149 	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
150 	int len = skb->len - ptr;
151 	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
152 	__be16 frag_off;
153 
154 	if (len < 0)
155 		return true;
156 
157 	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
158 	if (ptr < 0)
159 		return false;
160 	if (nexthdr == IPPROTO_ICMPV6) {
161 		u8 _type, *tp;
162 		tp = skb_header_pointer(skb,
163 			ptr+offsetof(struct icmp6hdr, icmp6_type),
164 			sizeof(_type), &_type);
165 		if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
166 			return true;
167 	}
168 	return false;
169 }
170 
171 /*
172  * Check the ICMP output rate limit
173  */
174 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
175 			       struct flowi6 *fl6)
176 {
177 	struct net *net = sock_net(sk);
178 	struct dst_entry *dst;
179 	bool res = false;
180 
181 	/* Informational messages are not limited. */
182 	if (type & ICMPV6_INFOMSG_MASK)
183 		return true;
184 
185 	/* Do not limit pmtu discovery, it would break it. */
186 	if (type == ICMPV6_PKT_TOOBIG)
187 		return true;
188 
189 	/*
190 	 * Look up the output route.
191 	 * XXX: perhaps the expire for routing entries cloned by
192 	 * this lookup should be more aggressive (not longer than timeout).
193 	 */
194 	dst = ip6_route_output(net, sk, fl6);
195 	if (dst->error) {
196 		IP6_INC_STATS(net, ip6_dst_idev(dst),
197 			      IPSTATS_MIB_OUTNOROUTES);
198 	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
199 		res = true;
200 	} else {
201 		struct rt6_info *rt = (struct rt6_info *)dst;
202 		int tmo = net->ipv6.sysctl.icmpv6_time;
203 
204 		/* Give more bandwidth to wider prefixes. */
205 		if (rt->rt6i_dst.plen < 128)
206 			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
207 
208 		if (icmp_global_allow()) {
209 			struct inet_peer *peer;
210 
211 			peer = inet_getpeer_v6(net->ipv6.peers,
212 					       &fl6->daddr, 1);
213 			res = inet_peer_xrlim_allow(peer, tmo);
214 			if (peer)
215 				inet_putpeer(peer);
216 		}
217 	}
218 	dst_release(dst);
219 	return res;
220 }
221 
222 /*
223  *	an inline helper for the "simple" if statement below
224  *	checks if parameter problem report is caused by an
225  *	unrecognized IPv6 option that has the Option Type
226  *	highest-order two bits set to 10
227  */
228 
229 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
230 {
231 	u8 _optval, *op;
232 
233 	offset += skb_network_offset(skb);
234 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
235 	if (!op)
236 		return true;
237 	return (*op & 0xC0) == 0x80;
238 }
239 
240 int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
241 			       struct icmp6hdr *thdr, int len)
242 {
243 	struct sk_buff *skb;
244 	struct icmp6hdr *icmp6h;
245 	int err = 0;
246 
247 	skb = skb_peek(&sk->sk_write_queue);
248 	if (!skb)
249 		goto out;
250 
251 	icmp6h = icmp6_hdr(skb);
252 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
253 	icmp6h->icmp6_cksum = 0;
254 
255 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
256 		skb->csum = csum_partial(icmp6h,
257 					sizeof(struct icmp6hdr), skb->csum);
258 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
259 						      &fl6->daddr,
260 						      len, fl6->flowi6_proto,
261 						      skb->csum);
262 	} else {
263 		__wsum tmp_csum = 0;
264 
265 		skb_queue_walk(&sk->sk_write_queue, skb) {
266 			tmp_csum = csum_add(tmp_csum, skb->csum);
267 		}
268 
269 		tmp_csum = csum_partial(icmp6h,
270 					sizeof(struct icmp6hdr), tmp_csum);
271 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
272 						      &fl6->daddr,
273 						      len, fl6->flowi6_proto,
274 						      tmp_csum);
275 	}
276 	ip6_push_pending_frames(sk);
277 out:
278 	return err;
279 }
280 
281 struct icmpv6_msg {
282 	struct sk_buff	*skb;
283 	int		offset;
284 	uint8_t		type;
285 };
286 
287 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
288 {
289 	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
290 	struct sk_buff *org_skb = msg->skb;
291 	__wsum csum = 0;
292 
293 	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
294 				      to, len, csum);
295 	skb->csum = csum_block_add(skb->csum, csum, odd);
296 	if (!(msg->type & ICMPV6_INFOMSG_MASK))
297 		nf_ct_attach(skb, org_skb);
298 	return 0;
299 }
300 
301 #if IS_ENABLED(CONFIG_IPV6_MIP6)
302 static void mip6_addr_swap(struct sk_buff *skb)
303 {
304 	struct ipv6hdr *iph = ipv6_hdr(skb);
305 	struct inet6_skb_parm *opt = IP6CB(skb);
306 	struct ipv6_destopt_hao *hao;
307 	struct in6_addr tmp;
308 	int off;
309 
310 	if (opt->dsthao) {
311 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
312 		if (likely(off >= 0)) {
313 			hao = (struct ipv6_destopt_hao *)
314 					(skb_network_header(skb) + off);
315 			tmp = iph->saddr;
316 			iph->saddr = hao->addr;
317 			hao->addr = tmp;
318 		}
319 	}
320 }
321 #else
322 static inline void mip6_addr_swap(struct sk_buff *skb) {}
323 #endif
324 
325 static struct dst_entry *icmpv6_route_lookup(struct net *net,
326 					     struct sk_buff *skb,
327 					     struct sock *sk,
328 					     struct flowi6 *fl6)
329 {
330 	struct dst_entry *dst, *dst2;
331 	struct flowi6 fl2;
332 	int err;
333 
334 	err = ip6_dst_lookup(net, sk, &dst, fl6);
335 	if (err)
336 		return ERR_PTR(err);
337 
338 	/*
339 	 * We won't send icmp if the destination is known
340 	 * anycast.
341 	 */
342 	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
343 		net_dbg_ratelimited("icmp6_send: acast source\n");
344 		dst_release(dst);
345 		return ERR_PTR(-EINVAL);
346 	}
347 
348 	/* No need to clone since we're just using its address. */
349 	dst2 = dst;
350 
351 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
352 	if (!IS_ERR(dst)) {
353 		if (dst != dst2)
354 			return dst;
355 	} else {
356 		if (PTR_ERR(dst) == -EPERM)
357 			dst = NULL;
358 		else
359 			return dst;
360 	}
361 
362 	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
363 	if (err)
364 		goto relookup_failed;
365 
366 	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
367 	if (err)
368 		goto relookup_failed;
369 
370 	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
371 	if (!IS_ERR(dst2)) {
372 		dst_release(dst);
373 		dst = dst2;
374 	} else {
375 		err = PTR_ERR(dst2);
376 		if (err == -EPERM) {
377 			dst_release(dst);
378 			return dst2;
379 		} else
380 			goto relookup_failed;
381 	}
382 
383 relookup_failed:
384 	if (dst)
385 		return dst;
386 	return ERR_PTR(err);
387 }
388 
389 /*
390  *	Send an ICMP message in response to a packet in error
391  */
392 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
393 		       const struct in6_addr *force_saddr)
394 {
395 	struct net *net = dev_net(skb->dev);
396 	struct inet6_dev *idev = NULL;
397 	struct ipv6hdr *hdr = ipv6_hdr(skb);
398 	struct sock *sk;
399 	struct ipv6_pinfo *np;
400 	const struct in6_addr *saddr = NULL;
401 	struct dst_entry *dst;
402 	struct icmp6hdr tmp_hdr;
403 	struct flowi6 fl6;
404 	struct icmpv6_msg msg;
405 	struct sockcm_cookie sockc_unused = {0};
406 	struct ipcm6_cookie ipc6;
407 	int iif = 0;
408 	int addr_type = 0;
409 	int len;
410 	int err = 0;
411 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
412 
413 	if ((u8 *)hdr < skb->head ||
414 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
415 		return;
416 
417 	/*
418 	 *	Make sure we respect the rules
419 	 *	i.e. RFC 1885 2.4(e)
420 	 *	Rule (e.1) is enforced by not using icmp6_send
421 	 *	in any code that processes icmp errors.
422 	 */
423 	addr_type = ipv6_addr_type(&hdr->daddr);
424 
425 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
426 	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
427 		saddr = &hdr->daddr;
428 
429 	/*
430 	 *	Dest addr check
431 	 */
432 
433 	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
434 		if (type != ICMPV6_PKT_TOOBIG &&
435 		    !(type == ICMPV6_PARAMPROB &&
436 		      code == ICMPV6_UNK_OPTION &&
437 		      (opt_unrec(skb, info))))
438 			return;
439 
440 		saddr = NULL;
441 	}
442 
443 	addr_type = ipv6_addr_type(&hdr->saddr);
444 
445 	/*
446 	 *	Source addr check
447 	 */
448 
449 	if (__ipv6_addr_needs_scope_id(addr_type))
450 		iif = skb->dev->ifindex;
451 	else {
452 		dst = skb_dst(skb);
453 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
454 	}
455 
456 	/*
457 	 *	Must not send error if the source does not uniquely
458 	 *	identify a single node (RFC2463 Section 2.4).
459 	 *	We check unspecified / multicast addresses here,
460 	 *	and anycast addresses will be checked later.
461 	 */
462 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
463 		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
464 				    &hdr->saddr, &hdr->daddr);
465 		return;
466 	}
467 
468 	/*
469 	 *	Never answer to a ICMP packet.
470 	 */
471 	if (is_ineligible(skb)) {
472 		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
473 				    &hdr->saddr, &hdr->daddr);
474 		return;
475 	}
476 
477 	mip6_addr_swap(skb);
478 
479 	memset(&fl6, 0, sizeof(fl6));
480 	fl6.flowi6_proto = IPPROTO_ICMPV6;
481 	fl6.daddr = hdr->saddr;
482 	if (force_saddr)
483 		saddr = force_saddr;
484 	if (saddr)
485 		fl6.saddr = *saddr;
486 	fl6.flowi6_mark = mark;
487 	fl6.flowi6_oif = iif;
488 	fl6.fl6_icmp_type = type;
489 	fl6.fl6_icmp_code = code;
490 	fl6.flowi6_uid = sock_net_uid(net, NULL);
491 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
492 
493 	sk = icmpv6_xmit_lock(net);
494 	if (!sk)
495 		return;
496 	sk->sk_mark = mark;
497 	np = inet6_sk(sk);
498 
499 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
500 		goto out;
501 
502 	tmp_hdr.icmp6_type = type;
503 	tmp_hdr.icmp6_code = code;
504 	tmp_hdr.icmp6_cksum = 0;
505 	tmp_hdr.icmp6_pointer = htonl(info);
506 
507 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
508 		fl6.flowi6_oif = np->mcast_oif;
509 	else if (!fl6.flowi6_oif)
510 		fl6.flowi6_oif = np->ucast_oif;
511 
512 	ipc6.tclass = np->tclass;
513 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
514 
515 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
516 	if (IS_ERR(dst))
517 		goto out;
518 
519 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
520 	ipc6.dontfrag = np->dontfrag;
521 	ipc6.opt = NULL;
522 
523 	msg.skb = skb;
524 	msg.offset = skb_network_offset(skb);
525 	msg.type = type;
526 
527 	len = skb->len - msg.offset;
528 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
529 	if (len < 0) {
530 		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
531 				    &hdr->saddr, &hdr->daddr);
532 		goto out_dst_release;
533 	}
534 
535 	rcu_read_lock();
536 	idev = __in6_dev_get(skb->dev);
537 
538 	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
539 			      len + sizeof(struct icmp6hdr),
540 			      sizeof(struct icmp6hdr),
541 			      &ipc6, &fl6, (struct rt6_info *)dst,
542 			      MSG_DONTWAIT, &sockc_unused);
543 	if (err) {
544 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
545 		ip6_flush_pending_frames(sk);
546 	} else {
547 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
548 						 len + sizeof(struct icmp6hdr));
549 	}
550 	rcu_read_unlock();
551 out_dst_release:
552 	dst_release(dst);
553 out:
554 	icmpv6_xmit_unlock(sk);
555 }
556 
557 /* Slightly more convenient version of icmp6_send.
558  */
559 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
560 {
561 	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
562 	kfree_skb(skb);
563 }
564 
565 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
566  * if sufficient data bytes are available
567  * @nhs is the size of the tunnel header(s) :
568  *  Either an IPv4 header for SIT encap
569  *         an IPv4 header + GRE header for GRE encap
570  */
571 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
572 			       unsigned int data_len)
573 {
574 	struct in6_addr temp_saddr;
575 	struct rt6_info *rt;
576 	struct sk_buff *skb2;
577 	u32 info = 0;
578 
579 	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
580 		return 1;
581 
582 	/* RFC 4884 (partial) support for ICMP extensions */
583 	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
584 		data_len = 0;
585 
586 	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
587 
588 	if (!skb2)
589 		return 1;
590 
591 	skb_dst_drop(skb2);
592 	skb_pull(skb2, nhs);
593 	skb_reset_network_header(skb2);
594 
595 	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
596 
597 	if (rt && rt->dst.dev)
598 		skb2->dev = rt->dst.dev;
599 
600 	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
601 
602 	if (data_len) {
603 		/* RFC 4884 (partial) support :
604 		 * insert 0 padding at the end, before the extensions
605 		 */
606 		__skb_push(skb2, nhs);
607 		skb_reset_network_header(skb2);
608 		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
609 		memset(skb2->data + data_len - nhs, 0, nhs);
610 		/* RFC 4884 4.5 : Length is measured in 64-bit words,
611 		 * and stored in reserved[0]
612 		 */
613 		info = (data_len/8) << 24;
614 	}
615 	if (type == ICMP_TIME_EXCEEDED)
616 		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
617 			   info, &temp_saddr);
618 	else
619 		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
620 			   info, &temp_saddr);
621 	if (rt)
622 		ip6_rt_put(rt);
623 
624 	kfree_skb(skb2);
625 
626 	return 0;
627 }
628 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
629 
630 static void icmpv6_echo_reply(struct sk_buff *skb)
631 {
632 	struct net *net = dev_net(skb->dev);
633 	struct sock *sk;
634 	struct inet6_dev *idev;
635 	struct ipv6_pinfo *np;
636 	const struct in6_addr *saddr = NULL;
637 	struct icmp6hdr *icmph = icmp6_hdr(skb);
638 	struct icmp6hdr tmp_hdr;
639 	struct flowi6 fl6;
640 	struct icmpv6_msg msg;
641 	struct dst_entry *dst;
642 	struct ipcm6_cookie ipc6;
643 	int err = 0;
644 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
645 	struct sockcm_cookie sockc_unused = {0};
646 
647 	saddr = &ipv6_hdr(skb)->daddr;
648 
649 	if (!ipv6_unicast_destination(skb) &&
650 	    !(net->ipv6.sysctl.anycast_src_echo_reply &&
651 	      ipv6_anycast_destination(skb_dst(skb), saddr)))
652 		saddr = NULL;
653 
654 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
655 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
656 
657 	memset(&fl6, 0, sizeof(fl6));
658 	fl6.flowi6_proto = IPPROTO_ICMPV6;
659 	fl6.daddr = ipv6_hdr(skb)->saddr;
660 	if (saddr)
661 		fl6.saddr = *saddr;
662 	fl6.flowi6_oif = skb->dev->ifindex;
663 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
664 	fl6.flowi6_mark = mark;
665 	fl6.flowi6_uid = sock_net_uid(net, NULL);
666 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
667 
668 	sk = icmpv6_xmit_lock(net);
669 	if (!sk)
670 		return;
671 	sk->sk_mark = mark;
672 	np = inet6_sk(sk);
673 
674 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
675 		fl6.flowi6_oif = np->mcast_oif;
676 	else if (!fl6.flowi6_oif)
677 		fl6.flowi6_oif = np->ucast_oif;
678 
679 	err = ip6_dst_lookup(net, sk, &dst, &fl6);
680 	if (err)
681 		goto out;
682 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
683 	if (IS_ERR(dst))
684 		goto out;
685 
686 	idev = __in6_dev_get(skb->dev);
687 
688 	msg.skb = skb;
689 	msg.offset = 0;
690 	msg.type = ICMPV6_ECHO_REPLY;
691 
692 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
693 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
694 	ipc6.dontfrag = np->dontfrag;
695 	ipc6.opt = NULL;
696 
697 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
698 				sizeof(struct icmp6hdr), &ipc6, &fl6,
699 				(struct rt6_info *)dst, MSG_DONTWAIT,
700 				&sockc_unused);
701 
702 	if (err) {
703 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
704 		ip6_flush_pending_frames(sk);
705 	} else {
706 		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
707 						 skb->len + sizeof(struct icmp6hdr));
708 	}
709 	dst_release(dst);
710 out:
711 	icmpv6_xmit_unlock(sk);
712 }
713 
714 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
715 {
716 	const struct inet6_protocol *ipprot;
717 	int inner_offset;
718 	__be16 frag_off;
719 	u8 nexthdr;
720 	struct net *net = dev_net(skb->dev);
721 
722 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
723 		goto out;
724 
725 	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
726 	if (ipv6_ext_hdr(nexthdr)) {
727 		/* now skip over extension headers */
728 		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
729 						&nexthdr, &frag_off);
730 		if (inner_offset < 0)
731 			goto out;
732 	} else {
733 		inner_offset = sizeof(struct ipv6hdr);
734 	}
735 
736 	/* Checkin header including 8 bytes of inner protocol header. */
737 	if (!pskb_may_pull(skb, inner_offset+8))
738 		goto out;
739 
740 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
741 	   Without this we will not able f.e. to make source routed
742 	   pmtu discovery.
743 	   Corresponding argument (opt) to notifiers is already added.
744 	   --ANK (980726)
745 	 */
746 
747 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
748 	if (ipprot && ipprot->err_handler)
749 		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
750 
751 	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
752 	return;
753 
754 out:
755 	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
756 }
757 
758 /*
759  *	Handle icmp messages
760  */
761 
762 static int icmpv6_rcv(struct sk_buff *skb)
763 {
764 	struct net_device *dev = skb->dev;
765 	struct inet6_dev *idev = __in6_dev_get(dev);
766 	const struct in6_addr *saddr, *daddr;
767 	struct icmp6hdr *hdr;
768 	u8 type;
769 	bool success = false;
770 
771 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
772 		struct sec_path *sp = skb_sec_path(skb);
773 		int nh;
774 
775 		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
776 				 XFRM_STATE_ICMP))
777 			goto drop_no_count;
778 
779 		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
780 			goto drop_no_count;
781 
782 		nh = skb_network_offset(skb);
783 		skb_set_network_header(skb, sizeof(*hdr));
784 
785 		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
786 			goto drop_no_count;
787 
788 		skb_set_network_header(skb, nh);
789 	}
790 
791 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
792 
793 	saddr = &ipv6_hdr(skb)->saddr;
794 	daddr = &ipv6_hdr(skb)->daddr;
795 
796 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
797 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
798 				    saddr, daddr);
799 		goto csum_error;
800 	}
801 
802 	if (!pskb_pull(skb, sizeof(*hdr)))
803 		goto discard_it;
804 
805 	hdr = icmp6_hdr(skb);
806 
807 	type = hdr->icmp6_type;
808 
809 	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
810 
811 	switch (type) {
812 	case ICMPV6_ECHO_REQUEST:
813 		icmpv6_echo_reply(skb);
814 		break;
815 
816 	case ICMPV6_ECHO_REPLY:
817 		success = ping_rcv(skb);
818 		break;
819 
820 	case ICMPV6_PKT_TOOBIG:
821 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
822 		   standard destination cache. Seems, only "advanced"
823 		   destination cache will allow to solve this problem
824 		   --ANK (980726)
825 		 */
826 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
827 			goto discard_it;
828 		hdr = icmp6_hdr(skb);
829 
830 		/*
831 		 *	Drop through to notify
832 		 */
833 
834 	case ICMPV6_DEST_UNREACH:
835 	case ICMPV6_TIME_EXCEED:
836 	case ICMPV6_PARAMPROB:
837 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
838 		break;
839 
840 	case NDISC_ROUTER_SOLICITATION:
841 	case NDISC_ROUTER_ADVERTISEMENT:
842 	case NDISC_NEIGHBOUR_SOLICITATION:
843 	case NDISC_NEIGHBOUR_ADVERTISEMENT:
844 	case NDISC_REDIRECT:
845 		ndisc_rcv(skb);
846 		break;
847 
848 	case ICMPV6_MGM_QUERY:
849 		igmp6_event_query(skb);
850 		break;
851 
852 	case ICMPV6_MGM_REPORT:
853 		igmp6_event_report(skb);
854 		break;
855 
856 	case ICMPV6_MGM_REDUCTION:
857 	case ICMPV6_NI_QUERY:
858 	case ICMPV6_NI_REPLY:
859 	case ICMPV6_MLD2_REPORT:
860 	case ICMPV6_DHAAD_REQUEST:
861 	case ICMPV6_DHAAD_REPLY:
862 	case ICMPV6_MOBILE_PREFIX_SOL:
863 	case ICMPV6_MOBILE_PREFIX_ADV:
864 		break;
865 
866 	default:
867 		/* informational */
868 		if (type & ICMPV6_INFOMSG_MASK)
869 			break;
870 
871 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
872 				    saddr, daddr);
873 
874 		/*
875 		 * error of unknown type.
876 		 * must pass to upper level
877 		 */
878 
879 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
880 	}
881 
882 	/* until the v6 path can be better sorted assume failure and
883 	 * preserve the status quo behaviour for the rest of the paths to here
884 	 */
885 	if (success)
886 		consume_skb(skb);
887 	else
888 		kfree_skb(skb);
889 
890 	return 0;
891 
892 csum_error:
893 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
894 discard_it:
895 	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
896 drop_no_count:
897 	kfree_skb(skb);
898 	return 0;
899 }
900 
901 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
902 		      u8 type,
903 		      const struct in6_addr *saddr,
904 		      const struct in6_addr *daddr,
905 		      int oif)
906 {
907 	memset(fl6, 0, sizeof(*fl6));
908 	fl6->saddr = *saddr;
909 	fl6->daddr = *daddr;
910 	fl6->flowi6_proto	= IPPROTO_ICMPV6;
911 	fl6->fl6_icmp_type	= type;
912 	fl6->fl6_icmp_code	= 0;
913 	fl6->flowi6_oif		= oif;
914 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
915 }
916 
917 static int __net_init icmpv6_sk_init(struct net *net)
918 {
919 	struct sock *sk;
920 	int err, i, j;
921 
922 	net->ipv6.icmp_sk =
923 		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
924 	if (!net->ipv6.icmp_sk)
925 		return -ENOMEM;
926 
927 	for_each_possible_cpu(i) {
928 		err = inet_ctl_sock_create(&sk, PF_INET6,
929 					   SOCK_RAW, IPPROTO_ICMPV6, net);
930 		if (err < 0) {
931 			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
932 			       err);
933 			goto fail;
934 		}
935 
936 		net->ipv6.icmp_sk[i] = sk;
937 
938 		/* Enough space for 2 64K ICMP packets, including
939 		 * sk_buff struct overhead.
940 		 */
941 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
942 	}
943 	return 0;
944 
945  fail:
946 	for (j = 0; j < i; j++)
947 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
948 	kfree(net->ipv6.icmp_sk);
949 	return err;
950 }
951 
952 static void __net_exit icmpv6_sk_exit(struct net *net)
953 {
954 	int i;
955 
956 	for_each_possible_cpu(i) {
957 		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
958 	}
959 	kfree(net->ipv6.icmp_sk);
960 }
961 
962 static struct pernet_operations icmpv6_sk_ops = {
963 	.init = icmpv6_sk_init,
964 	.exit = icmpv6_sk_exit,
965 };
966 
967 int __init icmpv6_init(void)
968 {
969 	int err;
970 
971 	err = register_pernet_subsys(&icmpv6_sk_ops);
972 	if (err < 0)
973 		return err;
974 
975 	err = -EAGAIN;
976 	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
977 		goto fail;
978 
979 	err = inet6_register_icmp_sender(icmp6_send);
980 	if (err)
981 		goto sender_reg_err;
982 	return 0;
983 
984 sender_reg_err:
985 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
986 fail:
987 	pr_err("Failed to register ICMP6 protocol\n");
988 	unregister_pernet_subsys(&icmpv6_sk_ops);
989 	return err;
990 }
991 
992 void icmpv6_cleanup(void)
993 {
994 	inet6_unregister_icmp_sender(icmp6_send);
995 	unregister_pernet_subsys(&icmpv6_sk_ops);
996 	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
997 }
998 
999 
1000 static const struct icmp6_err {
1001 	int err;
1002 	int fatal;
1003 } tab_unreach[] = {
1004 	{	/* NOROUTE */
1005 		.err	= ENETUNREACH,
1006 		.fatal	= 0,
1007 	},
1008 	{	/* ADM_PROHIBITED */
1009 		.err	= EACCES,
1010 		.fatal	= 1,
1011 	},
1012 	{	/* Was NOT_NEIGHBOUR, now reserved */
1013 		.err	= EHOSTUNREACH,
1014 		.fatal	= 0,
1015 	},
1016 	{	/* ADDR_UNREACH	*/
1017 		.err	= EHOSTUNREACH,
1018 		.fatal	= 0,
1019 	},
1020 	{	/* PORT_UNREACH	*/
1021 		.err	= ECONNREFUSED,
1022 		.fatal	= 1,
1023 	},
1024 	{	/* POLICY_FAIL */
1025 		.err	= EACCES,
1026 		.fatal	= 1,
1027 	},
1028 	{	/* REJECT_ROUTE	*/
1029 		.err	= EACCES,
1030 		.fatal	= 1,
1031 	},
1032 };
1033 
1034 int icmpv6_err_convert(u8 type, u8 code, int *err)
1035 {
1036 	int fatal = 0;
1037 
1038 	*err = EPROTO;
1039 
1040 	switch (type) {
1041 	case ICMPV6_DEST_UNREACH:
1042 		fatal = 1;
1043 		if (code < ARRAY_SIZE(tab_unreach)) {
1044 			*err  = tab_unreach[code].err;
1045 			fatal = tab_unreach[code].fatal;
1046 		}
1047 		break;
1048 
1049 	case ICMPV6_PKT_TOOBIG:
1050 		*err = EMSGSIZE;
1051 		break;
1052 
1053 	case ICMPV6_PARAMPROB:
1054 		*err = EPROTO;
1055 		fatal = 1;
1056 		break;
1057 
1058 	case ICMPV6_TIME_EXCEED:
1059 		*err = EHOSTUNREACH;
1060 		break;
1061 	}
1062 
1063 	return fatal;
1064 }
1065 EXPORT_SYMBOL(icmpv6_err_convert);
1066 
1067 #ifdef CONFIG_SYSCTL
1068 static struct ctl_table ipv6_icmp_table_template[] = {
1069 	{
1070 		.procname	= "ratelimit",
1071 		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1072 		.maxlen		= sizeof(int),
1073 		.mode		= 0644,
1074 		.proc_handler	= proc_dointvec_ms_jiffies,
1075 	},
1076 	{ },
1077 };
1078 
1079 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1080 {
1081 	struct ctl_table *table;
1082 
1083 	table = kmemdup(ipv6_icmp_table_template,
1084 			sizeof(ipv6_icmp_table_template),
1085 			GFP_KERNEL);
1086 
1087 	if (table)
1088 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1089 
1090 	return table;
1091 }
1092 #endif
1093