1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Internet Control Message Protocol (ICMPv6)
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on net/ipv4/icmp.c
10 *
11 * RFC 1885
12 */
13
14 /*
15 * Changes:
16 *
17 * Andi Kleen : exception handling
18 * Andi Kleen add rate limits. never reply to a icmp.
19 * add more length checks and other fixes.
20 * yoshfuji : ensure to sent parameter problem for
21 * fragments.
22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
23 * Randy Dunlap and
24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
26 */
27
28 #define pr_fmt(fmt) "IPv6: " fmt
29
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50
51 #include <net/ip.h>
52 #include <net/sock.h>
53
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69
70 #include <linux/uaccess.h>
71
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73
icmpv6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 u8 type, u8 code, int offset, __be32 info)
76 {
77 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 struct net *net = dev_net(skb->dev);
80
81 if (type == ICMPV6_PKT_TOOBIG)
82 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 else if (type == NDISC_REDIRECT)
84 ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 sock_net_uid(net, NULL));
86
87 if (!(type & ICMPV6_INFOMSG_MASK))
88 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 ping_err(skb, offset, ntohl(info));
90
91 return 0;
92 }
93
94 static int icmpv6_rcv(struct sk_buff *skb);
95
96 static const struct inet6_protocol icmpv6_protocol = {
97 .handler = icmpv6_rcv,
98 .err_handler = icmpv6_err,
99 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101
102 /* Called with BH disabled */
icmpv6_xmit_lock(struct net * net)103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105 struct sock *sk;
106
107 sk = this_cpu_read(ipv6_icmp_sk);
108 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 /* This can happen if the output path (f.e. SIT or
110 * ip6ip6 tunnel) signals dst_link_failure() for an
111 * outgoing ICMP6 packet.
112 */
113 return NULL;
114 }
115 sock_net_set(sk, net);
116 return sk;
117 }
118
icmpv6_xmit_unlock(struct sock * sk)119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121 sock_net_set(sk, &init_net);
122 spin_unlock(&sk->sk_lock.slock);
123 }
124
125 /*
126 * Figure out, may we reply to this packet with icmp error.
127 *
128 * We do not reply, if:
129 * - it was icmp error message.
130 * - it is truncated, so that it is known, that protocol is ICMPV6
131 * (i.e. in the middle of some exthdr)
132 *
133 * --ANK (980726)
134 */
135
is_ineligible(const struct sk_buff * skb)136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 int len = skb->len - ptr;
140 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 __be16 frag_off;
142
143 if (len < 0)
144 return true;
145
146 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 if (ptr < 0)
148 return false;
149 if (nexthdr == IPPROTO_ICMPV6) {
150 u8 _type, *tp;
151 tp = skb_header_pointer(skb,
152 ptr+offsetof(struct icmp6hdr, icmp6_type),
153 sizeof(_type), &_type);
154
155 /* Based on RFC 8200, Section 4.5 Fragment Header, return
156 * false if this is a fragment packet with no icmp header info.
157 */
158 if (!tp && frag_off != 0)
159 return false;
160 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 return true;
162 }
163 return false;
164 }
165
icmpv6_mask_allow(struct net * net,int type)166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168 if (type > ICMPV6_MSG_MAX)
169 return true;
170
171 /* Limit if icmp type is set in ratemask. */
172 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 return true;
174
175 return false;
176 }
177
icmpv6_global_allow(struct net * net,int type,bool * apply_ratelimit)178 static bool icmpv6_global_allow(struct net *net, int type,
179 bool *apply_ratelimit)
180 {
181 if (icmpv6_mask_allow(net, type))
182 return true;
183
184 if (icmp_global_allow()) {
185 *apply_ratelimit = true;
186 return true;
187 }
188 __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
189 return false;
190 }
191
192 /*
193 * Check the ICMP output rate limit
194 */
icmpv6_xrlim_allow(struct sock * sk,u8 type,struct flowi6 * fl6,bool apply_ratelimit)195 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
196 struct flowi6 *fl6, bool apply_ratelimit)
197 {
198 struct net *net = sock_net(sk);
199 struct dst_entry *dst;
200 bool res = false;
201
202 if (!apply_ratelimit)
203 return true;
204
205 /*
206 * Look up the output route.
207 * XXX: perhaps the expire for routing entries cloned by
208 * this lookup should be more aggressive (not longer than timeout).
209 */
210 dst = ip6_route_output(net, sk, fl6);
211 if (dst->error) {
212 IP6_INC_STATS(net, ip6_dst_idev(dst),
213 IPSTATS_MIB_OUTNOROUTES);
214 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
215 res = true;
216 } else {
217 struct rt6_info *rt = (struct rt6_info *)dst;
218 int tmo = net->ipv6.sysctl.icmpv6_time;
219 struct inet_peer *peer;
220
221 /* Give more bandwidth to wider prefixes. */
222 if (rt->rt6i_dst.plen < 128)
223 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
224
225 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
226 res = inet_peer_xrlim_allow(peer, tmo);
227 if (peer)
228 inet_putpeer(peer);
229 }
230 if (!res)
231 __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
232 ICMP6_MIB_RATELIMITHOST);
233 else
234 icmp_global_consume();
235 dst_release(dst);
236 return res;
237 }
238
icmpv6_rt_has_prefsrc(struct sock * sk,u8 type,struct flowi6 * fl6)239 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
240 struct flowi6 *fl6)
241 {
242 struct net *net = sock_net(sk);
243 struct dst_entry *dst;
244 bool res = false;
245
246 dst = ip6_route_output(net, sk, fl6);
247 if (!dst->error) {
248 struct rt6_info *rt = (struct rt6_info *)dst;
249 struct in6_addr prefsrc;
250
251 rt6_get_prefsrc(rt, &prefsrc);
252 res = !ipv6_addr_any(&prefsrc);
253 }
254 dst_release(dst);
255 return res;
256 }
257
258 /*
259 * an inline helper for the "simple" if statement below
260 * checks if parameter problem report is caused by an
261 * unrecognized IPv6 option that has the Option Type
262 * highest-order two bits set to 10
263 */
264
opt_unrec(struct sk_buff * skb,__u32 offset)265 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
266 {
267 u8 _optval, *op;
268
269 offset += skb_network_offset(skb);
270 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
271 if (!op)
272 return true;
273 return (*op & 0xC0) == 0x80;
274 }
275
icmpv6_push_pending_frames(struct sock * sk,struct flowi6 * fl6,struct icmp6hdr * thdr,int len)276 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
277 struct icmp6hdr *thdr, int len)
278 {
279 struct sk_buff *skb;
280 struct icmp6hdr *icmp6h;
281
282 skb = skb_peek(&sk->sk_write_queue);
283 if (!skb)
284 return;
285
286 icmp6h = icmp6_hdr(skb);
287 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
288 icmp6h->icmp6_cksum = 0;
289
290 if (skb_queue_len(&sk->sk_write_queue) == 1) {
291 skb->csum = csum_partial(icmp6h,
292 sizeof(struct icmp6hdr), skb->csum);
293 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
294 &fl6->daddr,
295 len, fl6->flowi6_proto,
296 skb->csum);
297 } else {
298 __wsum tmp_csum = 0;
299
300 skb_queue_walk(&sk->sk_write_queue, skb) {
301 tmp_csum = csum_add(tmp_csum, skb->csum);
302 }
303
304 tmp_csum = csum_partial(icmp6h,
305 sizeof(struct icmp6hdr), tmp_csum);
306 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
307 &fl6->daddr,
308 len, fl6->flowi6_proto,
309 tmp_csum);
310 }
311 ip6_push_pending_frames(sk);
312 }
313
314 struct icmpv6_msg {
315 struct sk_buff *skb;
316 int offset;
317 uint8_t type;
318 };
319
icmpv6_getfrag(void * from,char * to,int offset,int len,int odd,struct sk_buff * skb)320 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
321 {
322 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
323 struct sk_buff *org_skb = msg->skb;
324 __wsum csum;
325
326 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
327 to, len);
328 skb->csum = csum_block_add(skb->csum, csum, odd);
329 if (!(msg->type & ICMPV6_INFOMSG_MASK))
330 nf_ct_attach(skb, org_skb);
331 return 0;
332 }
333
334 #if IS_ENABLED(CONFIG_IPV6_MIP6)
mip6_addr_swap(struct sk_buff * skb,const struct inet6_skb_parm * opt)335 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
336 {
337 struct ipv6hdr *iph = ipv6_hdr(skb);
338 struct ipv6_destopt_hao *hao;
339 int off;
340
341 if (opt->dsthao) {
342 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
343 if (likely(off >= 0)) {
344 hao = (struct ipv6_destopt_hao *)
345 (skb_network_header(skb) + off);
346 swap(iph->saddr, hao->addr);
347 }
348 }
349 }
350 #else
mip6_addr_swap(struct sk_buff * skb,const struct inet6_skb_parm * opt)351 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
352 #endif
353
icmpv6_route_lookup(struct net * net,struct sk_buff * skb,struct sock * sk,struct flowi6 * fl6)354 static struct dst_entry *icmpv6_route_lookup(struct net *net,
355 struct sk_buff *skb,
356 struct sock *sk,
357 struct flowi6 *fl6)
358 {
359 struct dst_entry *dst, *dst2;
360 struct flowi6 fl2;
361 int err;
362
363 err = ip6_dst_lookup(net, sk, &dst, fl6);
364 if (err)
365 return ERR_PTR(err);
366
367 /*
368 * We won't send icmp if the destination is known
369 * anycast unless we need to treat anycast as unicast.
370 */
371 if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
372 ipv6_anycast_destination(dst, &fl6->daddr)) {
373 net_dbg_ratelimited("icmp6_send: acast source\n");
374 dst_release(dst);
375 return ERR_PTR(-EINVAL);
376 }
377
378 /* No need to clone since we're just using its address. */
379 dst2 = dst;
380
381 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
382 if (!IS_ERR(dst)) {
383 if (dst != dst2)
384 return dst;
385 } else {
386 if (PTR_ERR(dst) == -EPERM)
387 dst = NULL;
388 else
389 return dst;
390 }
391
392 err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
393 if (err)
394 goto relookup_failed;
395
396 err = ip6_dst_lookup(net, sk, &dst2, &fl2);
397 if (err)
398 goto relookup_failed;
399
400 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
401 if (!IS_ERR(dst2)) {
402 dst_release(dst);
403 dst = dst2;
404 } else {
405 err = PTR_ERR(dst2);
406 if (err == -EPERM) {
407 dst_release(dst);
408 return dst2;
409 } else
410 goto relookup_failed;
411 }
412
413 relookup_failed:
414 if (dst)
415 return dst;
416 return ERR_PTR(err);
417 }
418
icmp6_dev(const struct sk_buff * skb)419 static struct net_device *icmp6_dev(const struct sk_buff *skb)
420 {
421 struct net_device *dev = skb->dev;
422
423 /* for local traffic to local address, skb dev is the loopback
424 * device. Check if there is a dst attached to the skb and if so
425 * get the real device index. Same is needed for replies to a link
426 * local address on a device enslaved to an L3 master device
427 */
428 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
429 const struct rt6_info *rt6 = skb_rt6_info(skb);
430
431 /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
432 * and ip6_null_entry could be set to skb if no route is found.
433 */
434 if (rt6 && rt6->rt6i_idev)
435 dev = rt6->rt6i_idev->dev;
436 }
437
438 return dev;
439 }
440
icmp6_iif(const struct sk_buff * skb)441 static int icmp6_iif(const struct sk_buff *skb)
442 {
443 return icmp6_dev(skb)->ifindex;
444 }
445
446 /*
447 * Send an ICMP message in response to a packet in error
448 */
icmp6_send(struct sk_buff * skb,u8 type,u8 code,__u32 info,const struct in6_addr * force_saddr,const struct inet6_skb_parm * parm)449 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
450 const struct in6_addr *force_saddr,
451 const struct inet6_skb_parm *parm)
452 {
453 struct inet6_dev *idev = NULL;
454 struct ipv6hdr *hdr = ipv6_hdr(skb);
455 struct sock *sk;
456 struct net *net;
457 struct ipv6_pinfo *np;
458 const struct in6_addr *saddr = NULL;
459 bool apply_ratelimit = false;
460 struct dst_entry *dst;
461 struct icmp6hdr tmp_hdr;
462 struct flowi6 fl6;
463 struct icmpv6_msg msg;
464 struct ipcm6_cookie ipc6;
465 int iif = 0;
466 int addr_type = 0;
467 int len;
468 u32 mark;
469
470 if ((u8 *)hdr < skb->head ||
471 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
472 return;
473
474 if (!skb->dev)
475 return;
476 net = dev_net(skb->dev);
477 mark = IP6_REPLY_MARK(net, skb->mark);
478 /*
479 * Make sure we respect the rules
480 * i.e. RFC 1885 2.4(e)
481 * Rule (e.1) is enforced by not using icmp6_send
482 * in any code that processes icmp errors.
483 */
484 addr_type = ipv6_addr_type(&hdr->daddr);
485
486 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
487 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
488 saddr = &hdr->daddr;
489
490 /*
491 * Dest addr check
492 */
493
494 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
495 if (type != ICMPV6_PKT_TOOBIG &&
496 !(type == ICMPV6_PARAMPROB &&
497 code == ICMPV6_UNK_OPTION &&
498 (opt_unrec(skb, info))))
499 return;
500
501 saddr = NULL;
502 }
503
504 addr_type = ipv6_addr_type(&hdr->saddr);
505
506 /*
507 * Source addr check
508 */
509
510 if (__ipv6_addr_needs_scope_id(addr_type)) {
511 iif = icmp6_iif(skb);
512 } else {
513 /*
514 * The source device is used for looking up which routing table
515 * to use for sending an ICMP error.
516 */
517 iif = l3mdev_master_ifindex(skb->dev);
518 }
519
520 /*
521 * Must not send error if the source does not uniquely
522 * identify a single node (RFC2463 Section 2.4).
523 * We check unspecified / multicast addresses here,
524 * and anycast addresses will be checked later.
525 */
526 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
527 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
528 &hdr->saddr, &hdr->daddr);
529 return;
530 }
531
532 /*
533 * Never answer to a ICMP packet.
534 */
535 if (is_ineligible(skb)) {
536 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
537 &hdr->saddr, &hdr->daddr);
538 return;
539 }
540
541 /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
542 local_bh_disable();
543
544 /* Check global sysctl_icmp_msgs_per_sec ratelimit */
545 if (!(skb->dev->flags & IFF_LOOPBACK) &&
546 !icmpv6_global_allow(net, type, &apply_ratelimit))
547 goto out_bh_enable;
548
549 mip6_addr_swap(skb, parm);
550
551 sk = icmpv6_xmit_lock(net);
552 if (!sk)
553 goto out_bh_enable;
554
555 memset(&fl6, 0, sizeof(fl6));
556 fl6.flowi6_proto = IPPROTO_ICMPV6;
557 fl6.daddr = hdr->saddr;
558 if (force_saddr)
559 saddr = force_saddr;
560 if (saddr) {
561 fl6.saddr = *saddr;
562 } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
563 /* select a more meaningful saddr from input if */
564 struct net_device *in_netdev;
565
566 in_netdev = dev_get_by_index(net, parm->iif);
567 if (in_netdev) {
568 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
569 inet6_sk(sk)->srcprefs,
570 &fl6.saddr);
571 dev_put(in_netdev);
572 }
573 }
574 fl6.flowi6_mark = mark;
575 fl6.flowi6_oif = iif;
576 fl6.fl6_icmp_type = type;
577 fl6.fl6_icmp_code = code;
578 fl6.flowi6_uid = sock_net_uid(net, NULL);
579 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
580 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
581
582 np = inet6_sk(sk);
583
584 if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
585 goto out;
586
587 tmp_hdr.icmp6_type = type;
588 tmp_hdr.icmp6_code = code;
589 tmp_hdr.icmp6_cksum = 0;
590 tmp_hdr.icmp6_pointer = htonl(info);
591
592 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
593 fl6.flowi6_oif = np->mcast_oif;
594 else if (!fl6.flowi6_oif)
595 fl6.flowi6_oif = np->ucast_oif;
596
597 ipcm6_init_sk(&ipc6, np);
598 ipc6.sockc.mark = mark;
599 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
600
601 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
602 if (IS_ERR(dst))
603 goto out;
604
605 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
606
607 msg.skb = skb;
608 msg.offset = skb_network_offset(skb);
609 msg.type = type;
610
611 len = skb->len - msg.offset;
612 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
613 if (len < 0) {
614 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
615 &hdr->saddr, &hdr->daddr);
616 goto out_dst_release;
617 }
618
619 rcu_read_lock();
620 idev = __in6_dev_get(skb->dev);
621
622 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
623 len + sizeof(struct icmp6hdr),
624 sizeof(struct icmp6hdr),
625 &ipc6, &fl6, (struct rt6_info *)dst,
626 MSG_DONTWAIT)) {
627 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
628 ip6_flush_pending_frames(sk);
629 } else {
630 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
631 len + sizeof(struct icmp6hdr));
632 }
633 rcu_read_unlock();
634 out_dst_release:
635 dst_release(dst);
636 out:
637 icmpv6_xmit_unlock(sk);
638 out_bh_enable:
639 local_bh_enable();
640 }
641 EXPORT_SYMBOL(icmp6_send);
642
643 /* Slightly more convenient version of icmp6_send with drop reasons.
644 */
icmpv6_param_prob_reason(struct sk_buff * skb,u8 code,int pos,enum skb_drop_reason reason)645 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
646 enum skb_drop_reason reason)
647 {
648 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
649 kfree_skb_reason(skb, reason);
650 }
651
652 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
653 * if sufficient data bytes are available
654 * @nhs is the size of the tunnel header(s) :
655 * Either an IPv4 header for SIT encap
656 * an IPv4 header + GRE header for GRE encap
657 */
ip6_err_gen_icmpv6_unreach(struct sk_buff * skb,int nhs,int type,unsigned int data_len)658 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
659 unsigned int data_len)
660 {
661 struct in6_addr temp_saddr;
662 struct rt6_info *rt;
663 struct sk_buff *skb2;
664 u32 info = 0;
665
666 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
667 return 1;
668
669 /* RFC 4884 (partial) support for ICMP extensions */
670 if (data_len < 128 || (data_len & 7) || skb->len < data_len)
671 data_len = 0;
672
673 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
674
675 if (!skb2)
676 return 1;
677
678 skb_dst_drop(skb2);
679 skb_pull(skb2, nhs);
680 skb_reset_network_header(skb2);
681
682 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
683 skb, 0);
684
685 if (rt && rt->dst.dev)
686 skb2->dev = rt->dst.dev;
687
688 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
689
690 if (data_len) {
691 /* RFC 4884 (partial) support :
692 * insert 0 padding at the end, before the extensions
693 */
694 __skb_push(skb2, nhs);
695 skb_reset_network_header(skb2);
696 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
697 memset(skb2->data + data_len - nhs, 0, nhs);
698 /* RFC 4884 4.5 : Length is measured in 64-bit words,
699 * and stored in reserved[0]
700 */
701 info = (data_len/8) << 24;
702 }
703 if (type == ICMP_TIME_EXCEEDED)
704 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
705 info, &temp_saddr, IP6CB(skb2));
706 else
707 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
708 info, &temp_saddr, IP6CB(skb2));
709 if (rt)
710 ip6_rt_put(rt);
711
712 kfree_skb(skb2);
713
714 return 0;
715 }
716 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
717
icmpv6_echo_reply(struct sk_buff * skb)718 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
719 {
720 struct net *net = dev_net(skb->dev);
721 struct sock *sk;
722 struct inet6_dev *idev;
723 struct ipv6_pinfo *np;
724 const struct in6_addr *saddr = NULL;
725 struct icmp6hdr *icmph = icmp6_hdr(skb);
726 bool apply_ratelimit = false;
727 struct icmp6hdr tmp_hdr;
728 struct flowi6 fl6;
729 struct icmpv6_msg msg;
730 struct dst_entry *dst;
731 struct ipcm6_cookie ipc6;
732 u32 mark = IP6_REPLY_MARK(net, skb->mark);
733 SKB_DR(reason);
734 bool acast;
735 u8 type;
736
737 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
738 net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
739 return reason;
740
741 saddr = &ipv6_hdr(skb)->daddr;
742
743 acast = ipv6_anycast_destination(skb_dst(skb), saddr);
744 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
745 return reason;
746
747 if (!ipv6_unicast_destination(skb) &&
748 !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
749 saddr = NULL;
750
751 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
752 type = ICMPV6_EXT_ECHO_REPLY;
753 else
754 type = ICMPV6_ECHO_REPLY;
755
756 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
757 tmp_hdr.icmp6_type = type;
758
759 memset(&fl6, 0, sizeof(fl6));
760 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
761 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
762
763 fl6.flowi6_proto = IPPROTO_ICMPV6;
764 fl6.daddr = ipv6_hdr(skb)->saddr;
765 if (saddr)
766 fl6.saddr = *saddr;
767 fl6.flowi6_oif = icmp6_iif(skb);
768 fl6.fl6_icmp_type = type;
769 fl6.flowi6_mark = mark;
770 fl6.flowi6_uid = sock_net_uid(net, NULL);
771 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
772
773 local_bh_disable();
774 sk = icmpv6_xmit_lock(net);
775 if (!sk)
776 goto out_bh_enable;
777 np = inet6_sk(sk);
778
779 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
780 fl6.flowi6_oif = np->mcast_oif;
781 else if (!fl6.flowi6_oif)
782 fl6.flowi6_oif = np->ucast_oif;
783
784 if (ip6_dst_lookup(net, sk, &dst, &fl6))
785 goto out;
786 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
787 if (IS_ERR(dst))
788 goto out;
789
790 /* Check the ratelimit */
791 if ((!(skb->dev->flags & IFF_LOOPBACK) &&
792 !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
793 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
794 goto out_dst_release;
795
796 idev = __in6_dev_get(skb->dev);
797
798 msg.skb = skb;
799 msg.offset = 0;
800 msg.type = type;
801
802 ipcm6_init_sk(&ipc6, np);
803 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
804 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
805 ipc6.sockc.mark = mark;
806
807 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
808 if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
809 goto out_dst_release;
810
811 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
812 skb->len + sizeof(struct icmp6hdr),
813 sizeof(struct icmp6hdr), &ipc6, &fl6,
814 (struct rt6_info *)dst, MSG_DONTWAIT)) {
815 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
816 ip6_flush_pending_frames(sk);
817 } else {
818 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
819 skb->len + sizeof(struct icmp6hdr));
820 reason = SKB_CONSUMED;
821 }
822 out_dst_release:
823 dst_release(dst);
824 out:
825 icmpv6_xmit_unlock(sk);
826 out_bh_enable:
827 local_bh_enable();
828 return reason;
829 }
830
icmpv6_notify(struct sk_buff * skb,u8 type,u8 code,__be32 info)831 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
832 u8 code, __be32 info)
833 {
834 struct inet6_skb_parm *opt = IP6CB(skb);
835 struct net *net = dev_net(skb->dev);
836 const struct inet6_protocol *ipprot;
837 enum skb_drop_reason reason;
838 int inner_offset;
839 __be16 frag_off;
840 u8 nexthdr;
841
842 reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
843 if (reason != SKB_NOT_DROPPED_YET)
844 goto out;
845
846 seg6_icmp_srh(skb, opt);
847
848 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
849 if (ipv6_ext_hdr(nexthdr)) {
850 /* now skip over extension headers */
851 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
852 &nexthdr, &frag_off);
853 if (inner_offset < 0) {
854 SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
855 goto out;
856 }
857 } else {
858 inner_offset = sizeof(struct ipv6hdr);
859 }
860
861 /* Checkin header including 8 bytes of inner protocol header. */
862 reason = pskb_may_pull_reason(skb, inner_offset + 8);
863 if (reason != SKB_NOT_DROPPED_YET)
864 goto out;
865
866 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
867 Without this we will not able f.e. to make source routed
868 pmtu discovery.
869 Corresponding argument (opt) to notifiers is already added.
870 --ANK (980726)
871 */
872
873 ipprot = rcu_dereference(inet6_protos[nexthdr]);
874 if (ipprot && ipprot->err_handler)
875 ipprot->err_handler(skb, opt, type, code, inner_offset, info);
876
877 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
878 return SKB_CONSUMED;
879
880 out:
881 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
882 return reason;
883 }
884
885 /*
886 * Handle icmp messages
887 */
888
icmpv6_rcv(struct sk_buff * skb)889 static int icmpv6_rcv(struct sk_buff *skb)
890 {
891 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
892 struct net *net = dev_net(skb->dev);
893 struct net_device *dev = icmp6_dev(skb);
894 struct inet6_dev *idev = __in6_dev_get(dev);
895 const struct in6_addr *saddr, *daddr;
896 struct icmp6hdr *hdr;
897 u8 type;
898
899 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
900 struct sec_path *sp = skb_sec_path(skb);
901 int nh;
902
903 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
904 XFRM_STATE_ICMP)) {
905 reason = SKB_DROP_REASON_XFRM_POLICY;
906 goto drop_no_count;
907 }
908
909 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
910 goto drop_no_count;
911
912 nh = skb_network_offset(skb);
913 skb_set_network_header(skb, sizeof(*hdr));
914
915 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
916 skb)) {
917 reason = SKB_DROP_REASON_XFRM_POLICY;
918 goto drop_no_count;
919 }
920
921 skb_set_network_header(skb, nh);
922 }
923
924 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
925
926 saddr = &ipv6_hdr(skb)->saddr;
927 daddr = &ipv6_hdr(skb)->daddr;
928
929 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
930 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
931 saddr, daddr);
932 goto csum_error;
933 }
934
935 if (!pskb_pull(skb, sizeof(*hdr)))
936 goto discard_it;
937
938 hdr = icmp6_hdr(skb);
939
940 type = hdr->icmp6_type;
941
942 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
943
944 switch (type) {
945 case ICMPV6_ECHO_REQUEST:
946 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
947 reason = icmpv6_echo_reply(skb);
948 break;
949 case ICMPV6_EXT_ECHO_REQUEST:
950 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
951 READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
952 reason = icmpv6_echo_reply(skb);
953 break;
954
955 case ICMPV6_ECHO_REPLY:
956 reason = ping_rcv(skb);
957 break;
958
959 case ICMPV6_EXT_ECHO_REPLY:
960 reason = ping_rcv(skb);
961 break;
962
963 case ICMPV6_PKT_TOOBIG:
964 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
965 standard destination cache. Seems, only "advanced"
966 destination cache will allow to solve this problem
967 --ANK (980726)
968 */
969 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
970 goto discard_it;
971 hdr = icmp6_hdr(skb);
972
973 /* to notify */
974 fallthrough;
975 case ICMPV6_DEST_UNREACH:
976 case ICMPV6_TIME_EXCEED:
977 case ICMPV6_PARAMPROB:
978 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
979 hdr->icmp6_mtu);
980 break;
981
982 case NDISC_ROUTER_SOLICITATION:
983 case NDISC_ROUTER_ADVERTISEMENT:
984 case NDISC_NEIGHBOUR_SOLICITATION:
985 case NDISC_NEIGHBOUR_ADVERTISEMENT:
986 case NDISC_REDIRECT:
987 reason = ndisc_rcv(skb);
988 break;
989
990 case ICMPV6_MGM_QUERY:
991 igmp6_event_query(skb);
992 return 0;
993
994 case ICMPV6_MGM_REPORT:
995 igmp6_event_report(skb);
996 return 0;
997
998 case ICMPV6_MGM_REDUCTION:
999 case ICMPV6_NI_QUERY:
1000 case ICMPV6_NI_REPLY:
1001 case ICMPV6_MLD2_REPORT:
1002 case ICMPV6_DHAAD_REQUEST:
1003 case ICMPV6_DHAAD_REPLY:
1004 case ICMPV6_MOBILE_PREFIX_SOL:
1005 case ICMPV6_MOBILE_PREFIX_ADV:
1006 break;
1007
1008 default:
1009 /* informational */
1010 if (type & ICMPV6_INFOMSG_MASK)
1011 break;
1012
1013 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1014 saddr, daddr);
1015
1016 /*
1017 * error of unknown type.
1018 * must pass to upper level
1019 */
1020
1021 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1022 hdr->icmp6_mtu);
1023 }
1024
1025 /* until the v6 path can be better sorted assume failure and
1026 * preserve the status quo behaviour for the rest of the paths to here
1027 */
1028 if (reason)
1029 kfree_skb_reason(skb, reason);
1030 else
1031 consume_skb(skb);
1032
1033 return 0;
1034
1035 csum_error:
1036 reason = SKB_DROP_REASON_ICMP_CSUM;
1037 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1038 discard_it:
1039 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1040 drop_no_count:
1041 kfree_skb_reason(skb, reason);
1042 return 0;
1043 }
1044
icmpv6_flow_init(const struct sock * sk,struct flowi6 * fl6,u8 type,const struct in6_addr * saddr,const struct in6_addr * daddr,int oif)1045 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1046 const struct in6_addr *saddr,
1047 const struct in6_addr *daddr, int oif)
1048 {
1049 memset(fl6, 0, sizeof(*fl6));
1050 fl6->saddr = *saddr;
1051 fl6->daddr = *daddr;
1052 fl6->flowi6_proto = IPPROTO_ICMPV6;
1053 fl6->fl6_icmp_type = type;
1054 fl6->fl6_icmp_code = 0;
1055 fl6->flowi6_oif = oif;
1056 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1057 }
1058
icmpv6_init(void)1059 int __init icmpv6_init(void)
1060 {
1061 struct sock *sk;
1062 int err, i;
1063
1064 for_each_possible_cpu(i) {
1065 err = inet_ctl_sock_create(&sk, PF_INET6,
1066 SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1067 if (err < 0) {
1068 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1069 err);
1070 return err;
1071 }
1072
1073 per_cpu(ipv6_icmp_sk, i) = sk;
1074
1075 /* Enough space for 2 64K ICMP packets, including
1076 * sk_buff struct overhead.
1077 */
1078 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1079 }
1080
1081 err = -EAGAIN;
1082 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1083 goto fail;
1084
1085 err = inet6_register_icmp_sender(icmp6_send);
1086 if (err)
1087 goto sender_reg_err;
1088 return 0;
1089
1090 sender_reg_err:
1091 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1092 fail:
1093 pr_err("Failed to register ICMP6 protocol\n");
1094 return err;
1095 }
1096
icmpv6_cleanup(void)1097 void icmpv6_cleanup(void)
1098 {
1099 inet6_unregister_icmp_sender(icmp6_send);
1100 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1101 }
1102
1103
1104 static const struct icmp6_err {
1105 int err;
1106 int fatal;
1107 } tab_unreach[] = {
1108 { /* NOROUTE */
1109 .err = ENETUNREACH,
1110 .fatal = 0,
1111 },
1112 { /* ADM_PROHIBITED */
1113 .err = EACCES,
1114 .fatal = 1,
1115 },
1116 { /* Was NOT_NEIGHBOUR, now reserved */
1117 .err = EHOSTUNREACH,
1118 .fatal = 0,
1119 },
1120 { /* ADDR_UNREACH */
1121 .err = EHOSTUNREACH,
1122 .fatal = 0,
1123 },
1124 { /* PORT_UNREACH */
1125 .err = ECONNREFUSED,
1126 .fatal = 1,
1127 },
1128 { /* POLICY_FAIL */
1129 .err = EACCES,
1130 .fatal = 1,
1131 },
1132 { /* REJECT_ROUTE */
1133 .err = EACCES,
1134 .fatal = 1,
1135 },
1136 };
1137
icmpv6_err_convert(u8 type,u8 code,int * err)1138 int icmpv6_err_convert(u8 type, u8 code, int *err)
1139 {
1140 int fatal = 0;
1141
1142 *err = EPROTO;
1143
1144 switch (type) {
1145 case ICMPV6_DEST_UNREACH:
1146 fatal = 1;
1147 if (code < ARRAY_SIZE(tab_unreach)) {
1148 *err = tab_unreach[code].err;
1149 fatal = tab_unreach[code].fatal;
1150 }
1151 break;
1152
1153 case ICMPV6_PKT_TOOBIG:
1154 *err = EMSGSIZE;
1155 break;
1156
1157 case ICMPV6_PARAMPROB:
1158 *err = EPROTO;
1159 fatal = 1;
1160 break;
1161
1162 case ICMPV6_TIME_EXCEED:
1163 *err = EHOSTUNREACH;
1164 break;
1165 }
1166
1167 return fatal;
1168 }
1169 EXPORT_SYMBOL(icmpv6_err_convert);
1170
1171 #ifdef CONFIG_SYSCTL
1172 static struct ctl_table ipv6_icmp_table_template[] = {
1173 {
1174 .procname = "ratelimit",
1175 .data = &init_net.ipv6.sysctl.icmpv6_time,
1176 .maxlen = sizeof(int),
1177 .mode = 0644,
1178 .proc_handler = proc_dointvec_ms_jiffies,
1179 },
1180 {
1181 .procname = "echo_ignore_all",
1182 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1183 .maxlen = sizeof(u8),
1184 .mode = 0644,
1185 .proc_handler = proc_dou8vec_minmax,
1186 },
1187 {
1188 .procname = "echo_ignore_multicast",
1189 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1190 .maxlen = sizeof(u8),
1191 .mode = 0644,
1192 .proc_handler = proc_dou8vec_minmax,
1193 },
1194 {
1195 .procname = "echo_ignore_anycast",
1196 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1197 .maxlen = sizeof(u8),
1198 .mode = 0644,
1199 .proc_handler = proc_dou8vec_minmax,
1200 },
1201 {
1202 .procname = "ratemask",
1203 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1204 .maxlen = ICMPV6_MSG_MAX + 1,
1205 .mode = 0644,
1206 .proc_handler = proc_do_large_bitmap,
1207 },
1208 {
1209 .procname = "error_anycast_as_unicast",
1210 .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1211 .maxlen = sizeof(u8),
1212 .mode = 0644,
1213 .proc_handler = proc_dou8vec_minmax,
1214 .extra1 = SYSCTL_ZERO,
1215 .extra2 = SYSCTL_ONE,
1216 },
1217 { },
1218 };
1219
ipv6_icmp_sysctl_init(struct net * net)1220 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1221 {
1222 struct ctl_table *table;
1223
1224 table = kmemdup(ipv6_icmp_table_template,
1225 sizeof(ipv6_icmp_table_template),
1226 GFP_KERNEL);
1227
1228 if (table) {
1229 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1230 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1231 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1232 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1233 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1234 table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1235 }
1236 return table;
1237 }
1238
ipv6_icmp_sysctl_table_size(void)1239 size_t ipv6_icmp_sysctl_table_size(void)
1240 {
1241 return ARRAY_SIZE(ipv6_icmp_table_template);
1242 }
1243 #endif
1244