1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Internet Control Message Protocol (ICMPv6) 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on net/ipv4/icmp.c 10 * 11 * RFC 1885 12 */ 13 14 /* 15 * Changes: 16 * 17 * Andi Kleen : exception handling 18 * Andi Kleen add rate limits. never reply to a icmp. 19 * add more length checks and other fixes. 20 * yoshfuji : ensure to sent parameter problem for 21 * fragments. 22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit. 23 * Randy Dunlap and 24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support 25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data 26 */ 27 28 #define pr_fmt(fmt) "IPv6: " fmt 29 30 #include <linux/module.h> 31 #include <linux/errno.h> 32 #include <linux/types.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/kernel.h> 36 #include <linux/sockios.h> 37 #include <linux/net.h> 38 #include <linux/skbuff.h> 39 #include <linux/init.h> 40 #include <linux/netfilter.h> 41 #include <linux/slab.h> 42 43 #ifdef CONFIG_SYSCTL 44 #include <linux/sysctl.h> 45 #endif 46 47 #include <linux/inet.h> 48 #include <linux/netdevice.h> 49 #include <linux/icmpv6.h> 50 51 #include <net/ip.h> 52 #include <net/sock.h> 53 54 #include <net/ipv6.h> 55 #include <net/ip6_checksum.h> 56 #include <net/ping.h> 57 #include <net/protocol.h> 58 #include <net/raw.h> 59 #include <net/rawv6.h> 60 #include <net/transp_v6.h> 61 #include <net/ip6_route.h> 62 #include <net/addrconf.h> 63 #include <net/icmp.h> 64 #include <net/xfrm.h> 65 #include <net/inet_common.h> 66 #include <net/dsfield.h> 67 #include <net/l3mdev.h> 68 69 #include <linux/uaccess.h> 70 71 /* 72 * The ICMP socket(s). This is the most convenient way to flow control 73 * our ICMP output as well as maintain a clean interface throughout 74 * all layers. All Socketless IP sends will soon be gone. 75 * 76 * On SMP we have one ICMP socket per-cpu. 77 */ 78 static struct sock *icmpv6_sk(struct net *net) 79 { 80 return this_cpu_read(*net->ipv6.icmp_sk); 81 } 82 83 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 84 u8 type, u8 code, int offset, __be32 info) 85 { 86 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ 87 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); 88 struct net *net = dev_net(skb->dev); 89 90 if (type == ICMPV6_PKT_TOOBIG) 91 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); 92 else if (type == NDISC_REDIRECT) 93 ip6_redirect(skb, net, skb->dev->ifindex, 0, 94 sock_net_uid(net, NULL)); 95 96 if (!(type & ICMPV6_INFOMSG_MASK)) 97 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) 98 ping_err(skb, offset, ntohl(info)); 99 100 return 0; 101 } 102 103 static int icmpv6_rcv(struct sk_buff *skb); 104 105 static const struct inet6_protocol icmpv6_protocol = { 106 .handler = icmpv6_rcv, 107 .err_handler = icmpv6_err, 108 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 109 }; 110 111 /* Called with BH disabled */ 112 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net) 113 { 114 struct sock *sk; 115 116 sk = icmpv6_sk(net); 117 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { 118 /* This can happen if the output path (f.e. SIT or 119 * ip6ip6 tunnel) signals dst_link_failure() for an 120 * outgoing ICMP6 packet. 121 */ 122 return NULL; 123 } 124 return sk; 125 } 126 127 static __inline__ void icmpv6_xmit_unlock(struct sock *sk) 128 { 129 spin_unlock(&sk->sk_lock.slock); 130 } 131 132 /* 133 * Figure out, may we reply to this packet with icmp error. 134 * 135 * We do not reply, if: 136 * - it was icmp error message. 137 * - it is truncated, so that it is known, that protocol is ICMPV6 138 * (i.e. in the middle of some exthdr) 139 * 140 * --ANK (980726) 141 */ 142 143 static bool is_ineligible(const struct sk_buff *skb) 144 { 145 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 146 int len = skb->len - ptr; 147 __u8 nexthdr = ipv6_hdr(skb)->nexthdr; 148 __be16 frag_off; 149 150 if (len < 0) 151 return true; 152 153 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); 154 if (ptr < 0) 155 return false; 156 if (nexthdr == IPPROTO_ICMPV6) { 157 u8 _type, *tp; 158 tp = skb_header_pointer(skb, 159 ptr+offsetof(struct icmp6hdr, icmp6_type), 160 sizeof(_type), &_type); 161 if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) 162 return true; 163 } 164 return false; 165 } 166 167 static bool icmpv6_mask_allow(struct net *net, int type) 168 { 169 if (type > ICMPV6_MSG_MAX) 170 return true; 171 172 /* Limit if icmp type is set in ratemask. */ 173 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask)) 174 return true; 175 176 return false; 177 } 178 179 static bool icmpv6_global_allow(struct net *net, int type) 180 { 181 if (icmpv6_mask_allow(net, type)) 182 return true; 183 184 if (icmp_global_allow()) 185 return true; 186 187 return false; 188 } 189 190 /* 191 * Check the ICMP output rate limit 192 */ 193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, 194 struct flowi6 *fl6) 195 { 196 struct net *net = sock_net(sk); 197 struct dst_entry *dst; 198 bool res = false; 199 200 if (icmpv6_mask_allow(net, type)) 201 return true; 202 203 /* 204 * Look up the output route. 205 * XXX: perhaps the expire for routing entries cloned by 206 * this lookup should be more aggressive (not longer than timeout). 207 */ 208 dst = ip6_route_output(net, sk, fl6); 209 if (dst->error) { 210 IP6_INC_STATS(net, ip6_dst_idev(dst), 211 IPSTATS_MIB_OUTNOROUTES); 212 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { 213 res = true; 214 } else { 215 struct rt6_info *rt = (struct rt6_info *)dst; 216 int tmo = net->ipv6.sysctl.icmpv6_time; 217 struct inet_peer *peer; 218 219 /* Give more bandwidth to wider prefixes. */ 220 if (rt->rt6i_dst.plen < 128) 221 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 222 223 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1); 224 res = inet_peer_xrlim_allow(peer, tmo); 225 if (peer) 226 inet_putpeer(peer); 227 } 228 dst_release(dst); 229 return res; 230 } 231 232 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, 233 struct flowi6 *fl6) 234 { 235 struct net *net = sock_net(sk); 236 struct dst_entry *dst; 237 bool res = false; 238 239 dst = ip6_route_output(net, sk, fl6); 240 if (!dst->error) { 241 struct rt6_info *rt = (struct rt6_info *)dst; 242 struct in6_addr prefsrc; 243 244 rt6_get_prefsrc(rt, &prefsrc); 245 res = !ipv6_addr_any(&prefsrc); 246 } 247 dst_release(dst); 248 return res; 249 } 250 251 /* 252 * an inline helper for the "simple" if statement below 253 * checks if parameter problem report is caused by an 254 * unrecognized IPv6 option that has the Option Type 255 * highest-order two bits set to 10 256 */ 257 258 static bool opt_unrec(struct sk_buff *skb, __u32 offset) 259 { 260 u8 _optval, *op; 261 262 offset += skb_network_offset(skb); 263 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); 264 if (!op) 265 return true; 266 return (*op & 0xC0) == 0x80; 267 } 268 269 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, 270 struct icmp6hdr *thdr, int len) 271 { 272 struct sk_buff *skb; 273 struct icmp6hdr *icmp6h; 274 275 skb = skb_peek(&sk->sk_write_queue); 276 if (!skb) 277 return; 278 279 icmp6h = icmp6_hdr(skb); 280 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); 281 icmp6h->icmp6_cksum = 0; 282 283 if (skb_queue_len(&sk->sk_write_queue) == 1) { 284 skb->csum = csum_partial(icmp6h, 285 sizeof(struct icmp6hdr), skb->csum); 286 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, 287 &fl6->daddr, 288 len, fl6->flowi6_proto, 289 skb->csum); 290 } else { 291 __wsum tmp_csum = 0; 292 293 skb_queue_walk(&sk->sk_write_queue, skb) { 294 tmp_csum = csum_add(tmp_csum, skb->csum); 295 } 296 297 tmp_csum = csum_partial(icmp6h, 298 sizeof(struct icmp6hdr), tmp_csum); 299 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, 300 &fl6->daddr, 301 len, fl6->flowi6_proto, 302 tmp_csum); 303 } 304 ip6_push_pending_frames(sk); 305 } 306 307 struct icmpv6_msg { 308 struct sk_buff *skb; 309 int offset; 310 uint8_t type; 311 }; 312 313 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) 314 { 315 struct icmpv6_msg *msg = (struct icmpv6_msg *) from; 316 struct sk_buff *org_skb = msg->skb; 317 __wsum csum; 318 319 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, 320 to, len); 321 skb->csum = csum_block_add(skb->csum, csum, odd); 322 if (!(msg->type & ICMPV6_INFOMSG_MASK)) 323 nf_ct_attach(skb, org_skb); 324 return 0; 325 } 326 327 #if IS_ENABLED(CONFIG_IPV6_MIP6) 328 static void mip6_addr_swap(struct sk_buff *skb) 329 { 330 struct ipv6hdr *iph = ipv6_hdr(skb); 331 struct inet6_skb_parm *opt = IP6CB(skb); 332 struct ipv6_destopt_hao *hao; 333 struct in6_addr tmp; 334 int off; 335 336 if (opt->dsthao) { 337 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); 338 if (likely(off >= 0)) { 339 hao = (struct ipv6_destopt_hao *) 340 (skb_network_header(skb) + off); 341 tmp = iph->saddr; 342 iph->saddr = hao->addr; 343 hao->addr = tmp; 344 } 345 } 346 } 347 #else 348 static inline void mip6_addr_swap(struct sk_buff *skb) {} 349 #endif 350 351 static struct dst_entry *icmpv6_route_lookup(struct net *net, 352 struct sk_buff *skb, 353 struct sock *sk, 354 struct flowi6 *fl6) 355 { 356 struct dst_entry *dst, *dst2; 357 struct flowi6 fl2; 358 int err; 359 360 err = ip6_dst_lookup(net, sk, &dst, fl6); 361 if (err) 362 return ERR_PTR(err); 363 364 /* 365 * We won't send icmp if the destination is known 366 * anycast. 367 */ 368 if (ipv6_anycast_destination(dst, &fl6->daddr)) { 369 net_dbg_ratelimited("icmp6_send: acast source\n"); 370 dst_release(dst); 371 return ERR_PTR(-EINVAL); 372 } 373 374 /* No need to clone since we're just using its address. */ 375 dst2 = dst; 376 377 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); 378 if (!IS_ERR(dst)) { 379 if (dst != dst2) 380 return dst; 381 } else { 382 if (PTR_ERR(dst) == -EPERM) 383 dst = NULL; 384 else 385 return dst; 386 } 387 388 err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); 389 if (err) 390 goto relookup_failed; 391 392 err = ip6_dst_lookup(net, sk, &dst2, &fl2); 393 if (err) 394 goto relookup_failed; 395 396 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); 397 if (!IS_ERR(dst2)) { 398 dst_release(dst); 399 dst = dst2; 400 } else { 401 err = PTR_ERR(dst2); 402 if (err == -EPERM) { 403 dst_release(dst); 404 return dst2; 405 } else 406 goto relookup_failed; 407 } 408 409 relookup_failed: 410 if (dst) 411 return dst; 412 return ERR_PTR(err); 413 } 414 415 static struct net_device *icmp6_dev(const struct sk_buff *skb) 416 { 417 struct net_device *dev = skb->dev; 418 419 /* for local traffic to local address, skb dev is the loopback 420 * device. Check if there is a dst attached to the skb and if so 421 * get the real device index. Same is needed for replies to a link 422 * local address on a device enslaved to an L3 master device 423 */ 424 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { 425 const struct rt6_info *rt6 = skb_rt6_info(skb); 426 427 if (rt6) 428 dev = rt6->rt6i_idev->dev; 429 } 430 431 return dev; 432 } 433 434 static int icmp6_iif(const struct sk_buff *skb) 435 { 436 return icmp6_dev(skb)->ifindex; 437 } 438 439 /* 440 * Send an ICMP message in response to a packet in error 441 */ 442 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 443 const struct in6_addr *force_saddr) 444 { 445 struct inet6_dev *idev = NULL; 446 struct ipv6hdr *hdr = ipv6_hdr(skb); 447 struct sock *sk; 448 struct net *net; 449 struct ipv6_pinfo *np; 450 const struct in6_addr *saddr = NULL; 451 struct dst_entry *dst; 452 struct icmp6hdr tmp_hdr; 453 struct flowi6 fl6; 454 struct icmpv6_msg msg; 455 struct ipcm6_cookie ipc6; 456 int iif = 0; 457 int addr_type = 0; 458 int len; 459 u32 mark; 460 461 if ((u8 *)hdr < skb->head || 462 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) 463 return; 464 465 if (!skb->dev) 466 return; 467 net = dev_net(skb->dev); 468 mark = IP6_REPLY_MARK(net, skb->mark); 469 /* 470 * Make sure we respect the rules 471 * i.e. RFC 1885 2.4(e) 472 * Rule (e.1) is enforced by not using icmp6_send 473 * in any code that processes icmp errors. 474 */ 475 addr_type = ipv6_addr_type(&hdr->daddr); 476 477 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || 478 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) 479 saddr = &hdr->daddr; 480 481 /* 482 * Dest addr check 483 */ 484 485 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { 486 if (type != ICMPV6_PKT_TOOBIG && 487 !(type == ICMPV6_PARAMPROB && 488 code == ICMPV6_UNK_OPTION && 489 (opt_unrec(skb, info)))) 490 return; 491 492 saddr = NULL; 493 } 494 495 addr_type = ipv6_addr_type(&hdr->saddr); 496 497 /* 498 * Source addr check 499 */ 500 501 if (__ipv6_addr_needs_scope_id(addr_type)) { 502 iif = icmp6_iif(skb); 503 } else { 504 /* 505 * The source device is used for looking up which routing table 506 * to use for sending an ICMP error. 507 */ 508 iif = l3mdev_master_ifindex(skb->dev); 509 } 510 511 /* 512 * Must not send error if the source does not uniquely 513 * identify a single node (RFC2463 Section 2.4). 514 * We check unspecified / multicast addresses here, 515 * and anycast addresses will be checked later. 516 */ 517 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { 518 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", 519 &hdr->saddr, &hdr->daddr); 520 return; 521 } 522 523 /* 524 * Never answer to a ICMP packet. 525 */ 526 if (is_ineligible(skb)) { 527 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", 528 &hdr->saddr, &hdr->daddr); 529 return; 530 } 531 532 /* Needed by both icmp_global_allow and icmpv6_xmit_lock */ 533 local_bh_disable(); 534 535 /* Check global sysctl_icmp_msgs_per_sec ratelimit */ 536 if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) 537 goto out_bh_enable; 538 539 mip6_addr_swap(skb); 540 541 sk = icmpv6_xmit_lock(net); 542 if (!sk) 543 goto out_bh_enable; 544 545 memset(&fl6, 0, sizeof(fl6)); 546 fl6.flowi6_proto = IPPROTO_ICMPV6; 547 fl6.daddr = hdr->saddr; 548 if (force_saddr) 549 saddr = force_saddr; 550 if (saddr) { 551 fl6.saddr = *saddr; 552 } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) { 553 /* select a more meaningful saddr from input if */ 554 struct net_device *in_netdev; 555 556 in_netdev = dev_get_by_index(net, IP6CB(skb)->iif); 557 if (in_netdev) { 558 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, 559 inet6_sk(sk)->srcprefs, 560 &fl6.saddr); 561 dev_put(in_netdev); 562 } 563 } 564 fl6.flowi6_mark = mark; 565 fl6.flowi6_oif = iif; 566 fl6.fl6_icmp_type = type; 567 fl6.fl6_icmp_code = code; 568 fl6.flowi6_uid = sock_net_uid(net, NULL); 569 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); 570 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 571 572 np = inet6_sk(sk); 573 574 if (!icmpv6_xrlim_allow(sk, type, &fl6)) 575 goto out; 576 577 tmp_hdr.icmp6_type = type; 578 tmp_hdr.icmp6_code = code; 579 tmp_hdr.icmp6_cksum = 0; 580 tmp_hdr.icmp6_pointer = htonl(info); 581 582 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 583 fl6.flowi6_oif = np->mcast_oif; 584 else if (!fl6.flowi6_oif) 585 fl6.flowi6_oif = np->ucast_oif; 586 587 ipcm6_init_sk(&ipc6, np); 588 ipc6.sockc.mark = mark; 589 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); 590 591 dst = icmpv6_route_lookup(net, skb, sk, &fl6); 592 if (IS_ERR(dst)) 593 goto out; 594 595 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 596 597 msg.skb = skb; 598 msg.offset = skb_network_offset(skb); 599 msg.type = type; 600 601 len = skb->len - msg.offset; 602 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); 603 if (len < 0) { 604 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", 605 &hdr->saddr, &hdr->daddr); 606 goto out_dst_release; 607 } 608 609 rcu_read_lock(); 610 idev = __in6_dev_get(skb->dev); 611 612 if (ip6_append_data(sk, icmpv6_getfrag, &msg, 613 len + sizeof(struct icmp6hdr), 614 sizeof(struct icmp6hdr), 615 &ipc6, &fl6, (struct rt6_info *)dst, 616 MSG_DONTWAIT)) { 617 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 618 ip6_flush_pending_frames(sk); 619 } else { 620 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 621 len + sizeof(struct icmp6hdr)); 622 } 623 rcu_read_unlock(); 624 out_dst_release: 625 dst_release(dst); 626 out: 627 icmpv6_xmit_unlock(sk); 628 out_bh_enable: 629 local_bh_enable(); 630 } 631 EXPORT_SYMBOL(icmp6_send); 632 633 /* Slightly more convenient version of icmp6_send. 634 */ 635 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) 636 { 637 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL); 638 kfree_skb(skb); 639 } 640 641 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH 642 * if sufficient data bytes are available 643 * @nhs is the size of the tunnel header(s) : 644 * Either an IPv4 header for SIT encap 645 * an IPv4 header + GRE header for GRE encap 646 */ 647 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, 648 unsigned int data_len) 649 { 650 struct in6_addr temp_saddr; 651 struct rt6_info *rt; 652 struct sk_buff *skb2; 653 u32 info = 0; 654 655 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) 656 return 1; 657 658 /* RFC 4884 (partial) support for ICMP extensions */ 659 if (data_len < 128 || (data_len & 7) || skb->len < data_len) 660 data_len = 0; 661 662 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); 663 664 if (!skb2) 665 return 1; 666 667 skb_dst_drop(skb2); 668 skb_pull(skb2, nhs); 669 skb_reset_network_header(skb2); 670 671 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 672 skb, 0); 673 674 if (rt && rt->dst.dev) 675 skb2->dev = rt->dst.dev; 676 677 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); 678 679 if (data_len) { 680 /* RFC 4884 (partial) support : 681 * insert 0 padding at the end, before the extensions 682 */ 683 __skb_push(skb2, nhs); 684 skb_reset_network_header(skb2); 685 memmove(skb2->data, skb2->data + nhs, data_len - nhs); 686 memset(skb2->data + data_len - nhs, 0, nhs); 687 /* RFC 4884 4.5 : Length is measured in 64-bit words, 688 * and stored in reserved[0] 689 */ 690 info = (data_len/8) << 24; 691 } 692 if (type == ICMP_TIME_EXCEEDED) 693 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 694 info, &temp_saddr); 695 else 696 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 697 info, &temp_saddr); 698 if (rt) 699 ip6_rt_put(rt); 700 701 kfree_skb(skb2); 702 703 return 0; 704 } 705 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); 706 707 static void icmpv6_echo_reply(struct sk_buff *skb) 708 { 709 struct net *net = dev_net(skb->dev); 710 struct sock *sk; 711 struct inet6_dev *idev; 712 struct ipv6_pinfo *np; 713 const struct in6_addr *saddr = NULL; 714 struct icmp6hdr *icmph = icmp6_hdr(skb); 715 struct icmp6hdr tmp_hdr; 716 struct flowi6 fl6; 717 struct icmpv6_msg msg; 718 struct dst_entry *dst; 719 struct ipcm6_cookie ipc6; 720 u32 mark = IP6_REPLY_MARK(net, skb->mark); 721 bool acast; 722 723 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && 724 net->ipv6.sysctl.icmpv6_echo_ignore_multicast) 725 return; 726 727 saddr = &ipv6_hdr(skb)->daddr; 728 729 acast = ipv6_anycast_destination(skb_dst(skb), saddr); 730 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast) 731 return; 732 733 if (!ipv6_unicast_destination(skb) && 734 !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) 735 saddr = NULL; 736 737 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); 738 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; 739 740 memset(&fl6, 0, sizeof(fl6)); 741 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES) 742 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb)); 743 744 fl6.flowi6_proto = IPPROTO_ICMPV6; 745 fl6.daddr = ipv6_hdr(skb)->saddr; 746 if (saddr) 747 fl6.saddr = *saddr; 748 fl6.flowi6_oif = icmp6_iif(skb); 749 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; 750 fl6.flowi6_mark = mark; 751 fl6.flowi6_uid = sock_net_uid(net, NULL); 752 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 753 754 local_bh_disable(); 755 sk = icmpv6_xmit_lock(net); 756 if (!sk) 757 goto out_bh_enable; 758 np = inet6_sk(sk); 759 760 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 761 fl6.flowi6_oif = np->mcast_oif; 762 else if (!fl6.flowi6_oif) 763 fl6.flowi6_oif = np->ucast_oif; 764 765 if (ip6_dst_lookup(net, sk, &dst, &fl6)) 766 goto out; 767 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); 768 if (IS_ERR(dst)) 769 goto out; 770 771 /* Check the ratelimit */ 772 if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || 773 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) 774 goto out_dst_release; 775 776 idev = __in6_dev_get(skb->dev); 777 778 msg.skb = skb; 779 msg.offset = 0; 780 msg.type = ICMPV6_ECHO_REPLY; 781 782 ipcm6_init_sk(&ipc6, np); 783 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 784 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); 785 ipc6.sockc.mark = mark; 786 787 if (ip6_append_data(sk, icmpv6_getfrag, &msg, 788 skb->len + sizeof(struct icmp6hdr), 789 sizeof(struct icmp6hdr), &ipc6, &fl6, 790 (struct rt6_info *)dst, MSG_DONTWAIT)) { 791 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 792 ip6_flush_pending_frames(sk); 793 } else { 794 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 795 skb->len + sizeof(struct icmp6hdr)); 796 } 797 out_dst_release: 798 dst_release(dst); 799 out: 800 icmpv6_xmit_unlock(sk); 801 out_bh_enable: 802 local_bh_enable(); 803 } 804 805 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) 806 { 807 const struct inet6_protocol *ipprot; 808 int inner_offset; 809 __be16 frag_off; 810 u8 nexthdr; 811 struct net *net = dev_net(skb->dev); 812 813 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 814 goto out; 815 816 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; 817 if (ipv6_ext_hdr(nexthdr)) { 818 /* now skip over extension headers */ 819 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), 820 &nexthdr, &frag_off); 821 if (inner_offset < 0) 822 goto out; 823 } else { 824 inner_offset = sizeof(struct ipv6hdr); 825 } 826 827 /* Checkin header including 8 bytes of inner protocol header. */ 828 if (!pskb_may_pull(skb, inner_offset+8)) 829 goto out; 830 831 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. 832 Without this we will not able f.e. to make source routed 833 pmtu discovery. 834 Corresponding argument (opt) to notifiers is already added. 835 --ANK (980726) 836 */ 837 838 ipprot = rcu_dereference(inet6_protos[nexthdr]); 839 if (ipprot && ipprot->err_handler) 840 ipprot->err_handler(skb, NULL, type, code, inner_offset, info); 841 842 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); 843 return; 844 845 out: 846 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); 847 } 848 849 /* 850 * Handle icmp messages 851 */ 852 853 static int icmpv6_rcv(struct sk_buff *skb) 854 { 855 struct net *net = dev_net(skb->dev); 856 struct net_device *dev = icmp6_dev(skb); 857 struct inet6_dev *idev = __in6_dev_get(dev); 858 const struct in6_addr *saddr, *daddr; 859 struct icmp6hdr *hdr; 860 u8 type; 861 bool success = false; 862 863 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 864 struct sec_path *sp = skb_sec_path(skb); 865 int nh; 866 867 if (!(sp && sp->xvec[sp->len - 1]->props.flags & 868 XFRM_STATE_ICMP)) 869 goto drop_no_count; 870 871 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) 872 goto drop_no_count; 873 874 nh = skb_network_offset(skb); 875 skb_set_network_header(skb, sizeof(*hdr)); 876 877 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) 878 goto drop_no_count; 879 880 skb_set_network_header(skb, nh); 881 } 882 883 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); 884 885 saddr = &ipv6_hdr(skb)->saddr; 886 daddr = &ipv6_hdr(skb)->daddr; 887 888 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { 889 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", 890 saddr, daddr); 891 goto csum_error; 892 } 893 894 if (!pskb_pull(skb, sizeof(*hdr))) 895 goto discard_it; 896 897 hdr = icmp6_hdr(skb); 898 899 type = hdr->icmp6_type; 900 901 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); 902 903 switch (type) { 904 case ICMPV6_ECHO_REQUEST: 905 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) 906 icmpv6_echo_reply(skb); 907 break; 908 909 case ICMPV6_ECHO_REPLY: 910 success = ping_rcv(skb); 911 break; 912 913 case ICMPV6_PKT_TOOBIG: 914 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update 915 standard destination cache. Seems, only "advanced" 916 destination cache will allow to solve this problem 917 --ANK (980726) 918 */ 919 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 920 goto discard_it; 921 hdr = icmp6_hdr(skb); 922 923 /* to notify */ 924 fallthrough; 925 case ICMPV6_DEST_UNREACH: 926 case ICMPV6_TIME_EXCEED: 927 case ICMPV6_PARAMPROB: 928 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); 929 break; 930 931 case NDISC_ROUTER_SOLICITATION: 932 case NDISC_ROUTER_ADVERTISEMENT: 933 case NDISC_NEIGHBOUR_SOLICITATION: 934 case NDISC_NEIGHBOUR_ADVERTISEMENT: 935 case NDISC_REDIRECT: 936 ndisc_rcv(skb); 937 break; 938 939 case ICMPV6_MGM_QUERY: 940 igmp6_event_query(skb); 941 break; 942 943 case ICMPV6_MGM_REPORT: 944 igmp6_event_report(skb); 945 break; 946 947 case ICMPV6_MGM_REDUCTION: 948 case ICMPV6_NI_QUERY: 949 case ICMPV6_NI_REPLY: 950 case ICMPV6_MLD2_REPORT: 951 case ICMPV6_DHAAD_REQUEST: 952 case ICMPV6_DHAAD_REPLY: 953 case ICMPV6_MOBILE_PREFIX_SOL: 954 case ICMPV6_MOBILE_PREFIX_ADV: 955 break; 956 957 default: 958 /* informational */ 959 if (type & ICMPV6_INFOMSG_MASK) 960 break; 961 962 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", 963 saddr, daddr); 964 965 /* 966 * error of unknown type. 967 * must pass to upper level 968 */ 969 970 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); 971 } 972 973 /* until the v6 path can be better sorted assume failure and 974 * preserve the status quo behaviour for the rest of the paths to here 975 */ 976 if (success) 977 consume_skb(skb); 978 else 979 kfree_skb(skb); 980 981 return 0; 982 983 csum_error: 984 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); 985 discard_it: 986 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); 987 drop_no_count: 988 kfree_skb(skb); 989 return 0; 990 } 991 992 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, 993 u8 type, 994 const struct in6_addr *saddr, 995 const struct in6_addr *daddr, 996 int oif) 997 { 998 memset(fl6, 0, sizeof(*fl6)); 999 fl6->saddr = *saddr; 1000 fl6->daddr = *daddr; 1001 fl6->flowi6_proto = IPPROTO_ICMPV6; 1002 fl6->fl6_icmp_type = type; 1003 fl6->fl6_icmp_code = 0; 1004 fl6->flowi6_oif = oif; 1005 security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); 1006 } 1007 1008 static void __net_exit icmpv6_sk_exit(struct net *net) 1009 { 1010 int i; 1011 1012 for_each_possible_cpu(i) 1013 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i)); 1014 free_percpu(net->ipv6.icmp_sk); 1015 } 1016 1017 static int __net_init icmpv6_sk_init(struct net *net) 1018 { 1019 struct sock *sk; 1020 int err, i; 1021 1022 net->ipv6.icmp_sk = alloc_percpu(struct sock *); 1023 if (!net->ipv6.icmp_sk) 1024 return -ENOMEM; 1025 1026 for_each_possible_cpu(i) { 1027 err = inet_ctl_sock_create(&sk, PF_INET6, 1028 SOCK_RAW, IPPROTO_ICMPV6, net); 1029 if (err < 0) { 1030 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", 1031 err); 1032 goto fail; 1033 } 1034 1035 *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk; 1036 1037 /* Enough space for 2 64K ICMP packets, including 1038 * sk_buff struct overhead. 1039 */ 1040 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); 1041 } 1042 return 0; 1043 1044 fail: 1045 icmpv6_sk_exit(net); 1046 return err; 1047 } 1048 1049 static struct pernet_operations icmpv6_sk_ops = { 1050 .init = icmpv6_sk_init, 1051 .exit = icmpv6_sk_exit, 1052 }; 1053 1054 int __init icmpv6_init(void) 1055 { 1056 int err; 1057 1058 err = register_pernet_subsys(&icmpv6_sk_ops); 1059 if (err < 0) 1060 return err; 1061 1062 err = -EAGAIN; 1063 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) 1064 goto fail; 1065 1066 err = inet6_register_icmp_sender(icmp6_send); 1067 if (err) 1068 goto sender_reg_err; 1069 return 0; 1070 1071 sender_reg_err: 1072 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 1073 fail: 1074 pr_err("Failed to register ICMP6 protocol\n"); 1075 unregister_pernet_subsys(&icmpv6_sk_ops); 1076 return err; 1077 } 1078 1079 void icmpv6_cleanup(void) 1080 { 1081 inet6_unregister_icmp_sender(icmp6_send); 1082 unregister_pernet_subsys(&icmpv6_sk_ops); 1083 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 1084 } 1085 1086 1087 static const struct icmp6_err { 1088 int err; 1089 int fatal; 1090 } tab_unreach[] = { 1091 { /* NOROUTE */ 1092 .err = ENETUNREACH, 1093 .fatal = 0, 1094 }, 1095 { /* ADM_PROHIBITED */ 1096 .err = EACCES, 1097 .fatal = 1, 1098 }, 1099 { /* Was NOT_NEIGHBOUR, now reserved */ 1100 .err = EHOSTUNREACH, 1101 .fatal = 0, 1102 }, 1103 { /* ADDR_UNREACH */ 1104 .err = EHOSTUNREACH, 1105 .fatal = 0, 1106 }, 1107 { /* PORT_UNREACH */ 1108 .err = ECONNREFUSED, 1109 .fatal = 1, 1110 }, 1111 { /* POLICY_FAIL */ 1112 .err = EACCES, 1113 .fatal = 1, 1114 }, 1115 { /* REJECT_ROUTE */ 1116 .err = EACCES, 1117 .fatal = 1, 1118 }, 1119 }; 1120 1121 int icmpv6_err_convert(u8 type, u8 code, int *err) 1122 { 1123 int fatal = 0; 1124 1125 *err = EPROTO; 1126 1127 switch (type) { 1128 case ICMPV6_DEST_UNREACH: 1129 fatal = 1; 1130 if (code < ARRAY_SIZE(tab_unreach)) { 1131 *err = tab_unreach[code].err; 1132 fatal = tab_unreach[code].fatal; 1133 } 1134 break; 1135 1136 case ICMPV6_PKT_TOOBIG: 1137 *err = EMSGSIZE; 1138 break; 1139 1140 case ICMPV6_PARAMPROB: 1141 *err = EPROTO; 1142 fatal = 1; 1143 break; 1144 1145 case ICMPV6_TIME_EXCEED: 1146 *err = EHOSTUNREACH; 1147 break; 1148 } 1149 1150 return fatal; 1151 } 1152 EXPORT_SYMBOL(icmpv6_err_convert); 1153 1154 #ifdef CONFIG_SYSCTL 1155 static struct ctl_table ipv6_icmp_table_template[] = { 1156 { 1157 .procname = "ratelimit", 1158 .data = &init_net.ipv6.sysctl.icmpv6_time, 1159 .maxlen = sizeof(int), 1160 .mode = 0644, 1161 .proc_handler = proc_dointvec_ms_jiffies, 1162 }, 1163 { 1164 .procname = "echo_ignore_all", 1165 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, 1166 .maxlen = sizeof(int), 1167 .mode = 0644, 1168 .proc_handler = proc_dointvec, 1169 }, 1170 { 1171 .procname = "echo_ignore_multicast", 1172 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, 1173 .maxlen = sizeof(int), 1174 .mode = 0644, 1175 .proc_handler = proc_dointvec, 1176 }, 1177 { 1178 .procname = "echo_ignore_anycast", 1179 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, 1180 .maxlen = sizeof(int), 1181 .mode = 0644, 1182 .proc_handler = proc_dointvec, 1183 }, 1184 { 1185 .procname = "ratemask", 1186 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr, 1187 .maxlen = ICMPV6_MSG_MAX + 1, 1188 .mode = 0644, 1189 .proc_handler = proc_do_large_bitmap, 1190 }, 1191 { }, 1192 }; 1193 1194 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) 1195 { 1196 struct ctl_table *table; 1197 1198 table = kmemdup(ipv6_icmp_table_template, 1199 sizeof(ipv6_icmp_table_template), 1200 GFP_KERNEL); 1201 1202 if (table) { 1203 table[0].data = &net->ipv6.sysctl.icmpv6_time; 1204 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; 1205 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; 1206 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; 1207 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; 1208 } 1209 return table; 1210 } 1211 #endif 1212