1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Internet Control Message Protocol (ICMPv6) 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on net/ipv4/icmp.c 10 * 11 * RFC 1885 12 */ 13 14 /* 15 * Changes: 16 * 17 * Andi Kleen : exception handling 18 * Andi Kleen add rate limits. never reply to a icmp. 19 * add more length checks and other fixes. 20 * yoshfuji : ensure to sent parameter problem for 21 * fragments. 22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit. 23 * Randy Dunlap and 24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support 25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data 26 */ 27 28 #define pr_fmt(fmt) "IPv6: " fmt 29 30 #include <linux/module.h> 31 #include <linux/errno.h> 32 #include <linux/types.h> 33 #include <linux/socket.h> 34 #include <linux/in.h> 35 #include <linux/kernel.h> 36 #include <linux/sockios.h> 37 #include <linux/net.h> 38 #include <linux/skbuff.h> 39 #include <linux/init.h> 40 #include <linux/netfilter.h> 41 #include <linux/slab.h> 42 43 #ifdef CONFIG_SYSCTL 44 #include <linux/sysctl.h> 45 #endif 46 47 #include <linux/inet.h> 48 #include <linux/netdevice.h> 49 #include <linux/icmpv6.h> 50 51 #include <net/ip.h> 52 #include <net/sock.h> 53 54 #include <net/ipv6.h> 55 #include <net/ip6_checksum.h> 56 #include <net/ping.h> 57 #include <net/protocol.h> 58 #include <net/raw.h> 59 #include <net/rawv6.h> 60 #include <net/seg6.h> 61 #include <net/transp_v6.h> 62 #include <net/ip6_route.h> 63 #include <net/addrconf.h> 64 #include <net/icmp.h> 65 #include <net/xfrm.h> 66 #include <net/inet_common.h> 67 #include <net/dsfield.h> 68 #include <net/l3mdev.h> 69 70 #include <linux/uaccess.h> 71 72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk); 73 74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 75 u8 type, u8 code, int offset, __be32 info) 76 { 77 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ 78 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); 79 struct net *net = dev_net(skb->dev); 80 81 if (type == ICMPV6_PKT_TOOBIG) 82 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); 83 else if (type == NDISC_REDIRECT) 84 ip6_redirect(skb, net, skb->dev->ifindex, 0, 85 sock_net_uid(net, NULL)); 86 87 if (!(type & ICMPV6_INFOMSG_MASK)) 88 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) 89 ping_err(skb, offset, ntohl(info)); 90 91 return 0; 92 } 93 94 static int icmpv6_rcv(struct sk_buff *skb); 95 96 static const struct inet6_protocol icmpv6_protocol = { 97 .handler = icmpv6_rcv, 98 .err_handler = icmpv6_err, 99 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 100 }; 101 102 /* Called with BH disabled */ 103 static struct sock *icmpv6_xmit_lock(struct net *net) 104 { 105 struct sock *sk; 106 107 sk = this_cpu_read(ipv6_icmp_sk); 108 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { 109 /* This can happen if the output path (f.e. SIT or 110 * ip6ip6 tunnel) signals dst_link_failure() for an 111 * outgoing ICMP6 packet. 112 */ 113 return NULL; 114 } 115 sock_net_set(sk, net); 116 return sk; 117 } 118 119 static void icmpv6_xmit_unlock(struct sock *sk) 120 { 121 sock_net_set(sk, &init_net); 122 spin_unlock(&sk->sk_lock.slock); 123 } 124 125 /* 126 * Figure out, may we reply to this packet with icmp error. 127 * 128 * We do not reply, if: 129 * - it was icmp error message. 130 * - it is truncated, so that it is known, that protocol is ICMPV6 131 * (i.e. in the middle of some exthdr) 132 * 133 * --ANK (980726) 134 */ 135 136 static bool is_ineligible(const struct sk_buff *skb) 137 { 138 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 139 int len = skb->len - ptr; 140 __u8 nexthdr = ipv6_hdr(skb)->nexthdr; 141 __be16 frag_off; 142 143 if (len < 0) 144 return true; 145 146 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); 147 if (ptr < 0) 148 return false; 149 if (nexthdr == IPPROTO_ICMPV6) { 150 u8 _type, *tp; 151 tp = skb_header_pointer(skb, 152 ptr+offsetof(struct icmp6hdr, icmp6_type), 153 sizeof(_type), &_type); 154 155 /* Based on RFC 8200, Section 4.5 Fragment Header, return 156 * false if this is a fragment packet with no icmp header info. 157 */ 158 if (!tp && frag_off != 0) 159 return false; 160 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) 161 return true; 162 } 163 return false; 164 } 165 166 static bool icmpv6_mask_allow(struct net *net, int type) 167 { 168 if (type > ICMPV6_MSG_MAX) 169 return true; 170 171 /* Limit if icmp type is set in ratemask. */ 172 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask)) 173 return true; 174 175 return false; 176 } 177 178 static bool icmpv6_global_allow(struct net *net, int type) 179 { 180 if (icmpv6_mask_allow(net, type)) 181 return true; 182 183 if (icmp_global_allow()) 184 return true; 185 186 __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); 187 return false; 188 } 189 190 /* 191 * Check the ICMP output rate limit 192 */ 193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, 194 struct flowi6 *fl6) 195 { 196 struct net *net = sock_net(sk); 197 struct dst_entry *dst; 198 bool res = false; 199 200 if (icmpv6_mask_allow(net, type)) 201 return true; 202 203 /* 204 * Look up the output route. 205 * XXX: perhaps the expire for routing entries cloned by 206 * this lookup should be more aggressive (not longer than timeout). 207 */ 208 dst = ip6_route_output(net, sk, fl6); 209 if (dst->error) { 210 IP6_INC_STATS(net, ip6_dst_idev(dst), 211 IPSTATS_MIB_OUTNOROUTES); 212 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { 213 res = true; 214 } else { 215 struct rt6_info *rt = (struct rt6_info *)dst; 216 int tmo = net->ipv6.sysctl.icmpv6_time; 217 struct inet_peer *peer; 218 219 /* Give more bandwidth to wider prefixes. */ 220 if (rt->rt6i_dst.plen < 128) 221 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 222 223 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1); 224 res = inet_peer_xrlim_allow(peer, tmo); 225 if (peer) 226 inet_putpeer(peer); 227 } 228 if (!res) 229 __ICMP6_INC_STATS(net, ip6_dst_idev(dst), 230 ICMP6_MIB_RATELIMITHOST); 231 dst_release(dst); 232 return res; 233 } 234 235 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, 236 struct flowi6 *fl6) 237 { 238 struct net *net = sock_net(sk); 239 struct dst_entry *dst; 240 bool res = false; 241 242 dst = ip6_route_output(net, sk, fl6); 243 if (!dst->error) { 244 struct rt6_info *rt = (struct rt6_info *)dst; 245 struct in6_addr prefsrc; 246 247 rt6_get_prefsrc(rt, &prefsrc); 248 res = !ipv6_addr_any(&prefsrc); 249 } 250 dst_release(dst); 251 return res; 252 } 253 254 /* 255 * an inline helper for the "simple" if statement below 256 * checks if parameter problem report is caused by an 257 * unrecognized IPv6 option that has the Option Type 258 * highest-order two bits set to 10 259 */ 260 261 static bool opt_unrec(struct sk_buff *skb, __u32 offset) 262 { 263 u8 _optval, *op; 264 265 offset += skb_network_offset(skb); 266 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); 267 if (!op) 268 return true; 269 return (*op & 0xC0) == 0x80; 270 } 271 272 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, 273 struct icmp6hdr *thdr, int len) 274 { 275 struct sk_buff *skb; 276 struct icmp6hdr *icmp6h; 277 278 skb = skb_peek(&sk->sk_write_queue); 279 if (!skb) 280 return; 281 282 icmp6h = icmp6_hdr(skb); 283 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); 284 icmp6h->icmp6_cksum = 0; 285 286 if (skb_queue_len(&sk->sk_write_queue) == 1) { 287 skb->csum = csum_partial(icmp6h, 288 sizeof(struct icmp6hdr), skb->csum); 289 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, 290 &fl6->daddr, 291 len, fl6->flowi6_proto, 292 skb->csum); 293 } else { 294 __wsum tmp_csum = 0; 295 296 skb_queue_walk(&sk->sk_write_queue, skb) { 297 tmp_csum = csum_add(tmp_csum, skb->csum); 298 } 299 300 tmp_csum = csum_partial(icmp6h, 301 sizeof(struct icmp6hdr), tmp_csum); 302 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, 303 &fl6->daddr, 304 len, fl6->flowi6_proto, 305 tmp_csum); 306 } 307 ip6_push_pending_frames(sk); 308 } 309 310 struct icmpv6_msg { 311 struct sk_buff *skb; 312 int offset; 313 uint8_t type; 314 }; 315 316 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) 317 { 318 struct icmpv6_msg *msg = (struct icmpv6_msg *) from; 319 struct sk_buff *org_skb = msg->skb; 320 __wsum csum; 321 322 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, 323 to, len); 324 skb->csum = csum_block_add(skb->csum, csum, odd); 325 if (!(msg->type & ICMPV6_INFOMSG_MASK)) 326 nf_ct_attach(skb, org_skb); 327 return 0; 328 } 329 330 #if IS_ENABLED(CONFIG_IPV6_MIP6) 331 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) 332 { 333 struct ipv6hdr *iph = ipv6_hdr(skb); 334 struct ipv6_destopt_hao *hao; 335 struct in6_addr tmp; 336 int off; 337 338 if (opt->dsthao) { 339 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); 340 if (likely(off >= 0)) { 341 hao = (struct ipv6_destopt_hao *) 342 (skb_network_header(skb) + off); 343 tmp = iph->saddr; 344 iph->saddr = hao->addr; 345 hao->addr = tmp; 346 } 347 } 348 } 349 #else 350 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} 351 #endif 352 353 static struct dst_entry *icmpv6_route_lookup(struct net *net, 354 struct sk_buff *skb, 355 struct sock *sk, 356 struct flowi6 *fl6) 357 { 358 struct dst_entry *dst, *dst2; 359 struct flowi6 fl2; 360 int err; 361 362 err = ip6_dst_lookup(net, sk, &dst, fl6); 363 if (err) 364 return ERR_PTR(err); 365 366 /* 367 * We won't send icmp if the destination is known 368 * anycast. 369 */ 370 if (ipv6_anycast_destination(dst, &fl6->daddr)) { 371 net_dbg_ratelimited("icmp6_send: acast source\n"); 372 dst_release(dst); 373 return ERR_PTR(-EINVAL); 374 } 375 376 /* No need to clone since we're just using its address. */ 377 dst2 = dst; 378 379 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); 380 if (!IS_ERR(dst)) { 381 if (dst != dst2) 382 return dst; 383 } else { 384 if (PTR_ERR(dst) == -EPERM) 385 dst = NULL; 386 else 387 return dst; 388 } 389 390 err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); 391 if (err) 392 goto relookup_failed; 393 394 err = ip6_dst_lookup(net, sk, &dst2, &fl2); 395 if (err) 396 goto relookup_failed; 397 398 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); 399 if (!IS_ERR(dst2)) { 400 dst_release(dst); 401 dst = dst2; 402 } else { 403 err = PTR_ERR(dst2); 404 if (err == -EPERM) { 405 dst_release(dst); 406 return dst2; 407 } else 408 goto relookup_failed; 409 } 410 411 relookup_failed: 412 if (dst) 413 return dst; 414 return ERR_PTR(err); 415 } 416 417 static struct net_device *icmp6_dev(const struct sk_buff *skb) 418 { 419 struct net_device *dev = skb->dev; 420 421 /* for local traffic to local address, skb dev is the loopback 422 * device. Check if there is a dst attached to the skb and if so 423 * get the real device index. Same is needed for replies to a link 424 * local address on a device enslaved to an L3 master device 425 */ 426 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { 427 const struct rt6_info *rt6 = skb_rt6_info(skb); 428 429 if (rt6) 430 dev = rt6->rt6i_idev->dev; 431 } 432 433 return dev; 434 } 435 436 static int icmp6_iif(const struct sk_buff *skb) 437 { 438 return icmp6_dev(skb)->ifindex; 439 } 440 441 /* 442 * Send an ICMP message in response to a packet in error 443 */ 444 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 445 const struct in6_addr *force_saddr, 446 const struct inet6_skb_parm *parm) 447 { 448 struct inet6_dev *idev = NULL; 449 struct ipv6hdr *hdr = ipv6_hdr(skb); 450 struct sock *sk; 451 struct net *net; 452 struct ipv6_pinfo *np; 453 const struct in6_addr *saddr = NULL; 454 struct dst_entry *dst; 455 struct icmp6hdr tmp_hdr; 456 struct flowi6 fl6; 457 struct icmpv6_msg msg; 458 struct ipcm6_cookie ipc6; 459 int iif = 0; 460 int addr_type = 0; 461 int len; 462 u32 mark; 463 464 if ((u8 *)hdr < skb->head || 465 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) 466 return; 467 468 if (!skb->dev) 469 return; 470 net = dev_net(skb->dev); 471 mark = IP6_REPLY_MARK(net, skb->mark); 472 /* 473 * Make sure we respect the rules 474 * i.e. RFC 1885 2.4(e) 475 * Rule (e.1) is enforced by not using icmp6_send 476 * in any code that processes icmp errors. 477 */ 478 addr_type = ipv6_addr_type(&hdr->daddr); 479 480 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || 481 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) 482 saddr = &hdr->daddr; 483 484 /* 485 * Dest addr check 486 */ 487 488 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { 489 if (type != ICMPV6_PKT_TOOBIG && 490 !(type == ICMPV6_PARAMPROB && 491 code == ICMPV6_UNK_OPTION && 492 (opt_unrec(skb, info)))) 493 return; 494 495 saddr = NULL; 496 } 497 498 addr_type = ipv6_addr_type(&hdr->saddr); 499 500 /* 501 * Source addr check 502 */ 503 504 if (__ipv6_addr_needs_scope_id(addr_type)) { 505 iif = icmp6_iif(skb); 506 } else { 507 /* 508 * The source device is used for looking up which routing table 509 * to use for sending an ICMP error. 510 */ 511 iif = l3mdev_master_ifindex(skb->dev); 512 } 513 514 /* 515 * Must not send error if the source does not uniquely 516 * identify a single node (RFC2463 Section 2.4). 517 * We check unspecified / multicast addresses here, 518 * and anycast addresses will be checked later. 519 */ 520 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { 521 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", 522 &hdr->saddr, &hdr->daddr); 523 return; 524 } 525 526 /* 527 * Never answer to a ICMP packet. 528 */ 529 if (is_ineligible(skb)) { 530 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", 531 &hdr->saddr, &hdr->daddr); 532 return; 533 } 534 535 /* Needed by both icmp_global_allow and icmpv6_xmit_lock */ 536 local_bh_disable(); 537 538 /* Check global sysctl_icmp_msgs_per_sec ratelimit */ 539 if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) 540 goto out_bh_enable; 541 542 mip6_addr_swap(skb, parm); 543 544 sk = icmpv6_xmit_lock(net); 545 if (!sk) 546 goto out_bh_enable; 547 548 memset(&fl6, 0, sizeof(fl6)); 549 fl6.flowi6_proto = IPPROTO_ICMPV6; 550 fl6.daddr = hdr->saddr; 551 if (force_saddr) 552 saddr = force_saddr; 553 if (saddr) { 554 fl6.saddr = *saddr; 555 } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) { 556 /* select a more meaningful saddr from input if */ 557 struct net_device *in_netdev; 558 559 in_netdev = dev_get_by_index(net, parm->iif); 560 if (in_netdev) { 561 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, 562 inet6_sk(sk)->srcprefs, 563 &fl6.saddr); 564 dev_put(in_netdev); 565 } 566 } 567 fl6.flowi6_mark = mark; 568 fl6.flowi6_oif = iif; 569 fl6.fl6_icmp_type = type; 570 fl6.fl6_icmp_code = code; 571 fl6.flowi6_uid = sock_net_uid(net, NULL); 572 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); 573 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 574 575 np = inet6_sk(sk); 576 577 if (!icmpv6_xrlim_allow(sk, type, &fl6)) 578 goto out; 579 580 tmp_hdr.icmp6_type = type; 581 tmp_hdr.icmp6_code = code; 582 tmp_hdr.icmp6_cksum = 0; 583 tmp_hdr.icmp6_pointer = htonl(info); 584 585 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 586 fl6.flowi6_oif = np->mcast_oif; 587 else if (!fl6.flowi6_oif) 588 fl6.flowi6_oif = np->ucast_oif; 589 590 ipcm6_init_sk(&ipc6, np); 591 ipc6.sockc.mark = mark; 592 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); 593 594 dst = icmpv6_route_lookup(net, skb, sk, &fl6); 595 if (IS_ERR(dst)) 596 goto out; 597 598 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 599 600 msg.skb = skb; 601 msg.offset = skb_network_offset(skb); 602 msg.type = type; 603 604 len = skb->len - msg.offset; 605 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); 606 if (len < 0) { 607 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", 608 &hdr->saddr, &hdr->daddr); 609 goto out_dst_release; 610 } 611 612 rcu_read_lock(); 613 idev = __in6_dev_get(skb->dev); 614 615 if (ip6_append_data(sk, icmpv6_getfrag, &msg, 616 len + sizeof(struct icmp6hdr), 617 sizeof(struct icmp6hdr), 618 &ipc6, &fl6, (struct rt6_info *)dst, 619 MSG_DONTWAIT)) { 620 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 621 ip6_flush_pending_frames(sk); 622 } else { 623 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 624 len + sizeof(struct icmp6hdr)); 625 } 626 rcu_read_unlock(); 627 out_dst_release: 628 dst_release(dst); 629 out: 630 icmpv6_xmit_unlock(sk); 631 out_bh_enable: 632 local_bh_enable(); 633 } 634 EXPORT_SYMBOL(icmp6_send); 635 636 /* Slightly more convenient version of icmp6_send with drop reasons. 637 */ 638 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, 639 enum skb_drop_reason reason) 640 { 641 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); 642 kfree_skb_reason(skb, reason); 643 } 644 645 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH 646 * if sufficient data bytes are available 647 * @nhs is the size of the tunnel header(s) : 648 * Either an IPv4 header for SIT encap 649 * an IPv4 header + GRE header for GRE encap 650 */ 651 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, 652 unsigned int data_len) 653 { 654 struct in6_addr temp_saddr; 655 struct rt6_info *rt; 656 struct sk_buff *skb2; 657 u32 info = 0; 658 659 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) 660 return 1; 661 662 /* RFC 4884 (partial) support for ICMP extensions */ 663 if (data_len < 128 || (data_len & 7) || skb->len < data_len) 664 data_len = 0; 665 666 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); 667 668 if (!skb2) 669 return 1; 670 671 skb_dst_drop(skb2); 672 skb_pull(skb2, nhs); 673 skb_reset_network_header(skb2); 674 675 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 676 skb, 0); 677 678 if (rt && rt->dst.dev) 679 skb2->dev = rt->dst.dev; 680 681 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); 682 683 if (data_len) { 684 /* RFC 4884 (partial) support : 685 * insert 0 padding at the end, before the extensions 686 */ 687 __skb_push(skb2, nhs); 688 skb_reset_network_header(skb2); 689 memmove(skb2->data, skb2->data + nhs, data_len - nhs); 690 memset(skb2->data + data_len - nhs, 0, nhs); 691 /* RFC 4884 4.5 : Length is measured in 64-bit words, 692 * and stored in reserved[0] 693 */ 694 info = (data_len/8) << 24; 695 } 696 if (type == ICMP_TIME_EXCEEDED) 697 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 698 info, &temp_saddr, IP6CB(skb2)); 699 else 700 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 701 info, &temp_saddr, IP6CB(skb2)); 702 if (rt) 703 ip6_rt_put(rt); 704 705 kfree_skb(skb2); 706 707 return 0; 708 } 709 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); 710 711 static void icmpv6_echo_reply(struct sk_buff *skb) 712 { 713 struct net *net = dev_net(skb->dev); 714 struct sock *sk; 715 struct inet6_dev *idev; 716 struct ipv6_pinfo *np; 717 const struct in6_addr *saddr = NULL; 718 struct icmp6hdr *icmph = icmp6_hdr(skb); 719 struct icmp6hdr tmp_hdr; 720 struct flowi6 fl6; 721 struct icmpv6_msg msg; 722 struct dst_entry *dst; 723 struct ipcm6_cookie ipc6; 724 u32 mark = IP6_REPLY_MARK(net, skb->mark); 725 bool acast; 726 u8 type; 727 728 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && 729 net->ipv6.sysctl.icmpv6_echo_ignore_multicast) 730 return; 731 732 saddr = &ipv6_hdr(skb)->daddr; 733 734 acast = ipv6_anycast_destination(skb_dst(skb), saddr); 735 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast) 736 return; 737 738 if (!ipv6_unicast_destination(skb) && 739 !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) 740 saddr = NULL; 741 742 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) 743 type = ICMPV6_EXT_ECHO_REPLY; 744 else 745 type = ICMPV6_ECHO_REPLY; 746 747 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); 748 tmp_hdr.icmp6_type = type; 749 750 memset(&fl6, 0, sizeof(fl6)); 751 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES) 752 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb)); 753 754 fl6.flowi6_proto = IPPROTO_ICMPV6; 755 fl6.daddr = ipv6_hdr(skb)->saddr; 756 if (saddr) 757 fl6.saddr = *saddr; 758 fl6.flowi6_oif = icmp6_iif(skb); 759 fl6.fl6_icmp_type = type; 760 fl6.flowi6_mark = mark; 761 fl6.flowi6_uid = sock_net_uid(net, NULL); 762 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 763 764 local_bh_disable(); 765 sk = icmpv6_xmit_lock(net); 766 if (!sk) 767 goto out_bh_enable; 768 np = inet6_sk(sk); 769 770 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 771 fl6.flowi6_oif = np->mcast_oif; 772 else if (!fl6.flowi6_oif) 773 fl6.flowi6_oif = np->ucast_oif; 774 775 if (ip6_dst_lookup(net, sk, &dst, &fl6)) 776 goto out; 777 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); 778 if (IS_ERR(dst)) 779 goto out; 780 781 /* Check the ratelimit */ 782 if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || 783 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) 784 goto out_dst_release; 785 786 idev = __in6_dev_get(skb->dev); 787 788 msg.skb = skb; 789 msg.offset = 0; 790 msg.type = type; 791 792 ipcm6_init_sk(&ipc6, np); 793 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); 794 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); 795 ipc6.sockc.mark = mark; 796 797 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) 798 if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr)) 799 goto out_dst_release; 800 801 if (ip6_append_data(sk, icmpv6_getfrag, &msg, 802 skb->len + sizeof(struct icmp6hdr), 803 sizeof(struct icmp6hdr), &ipc6, &fl6, 804 (struct rt6_info *)dst, MSG_DONTWAIT)) { 805 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); 806 ip6_flush_pending_frames(sk); 807 } else { 808 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, 809 skb->len + sizeof(struct icmp6hdr)); 810 } 811 out_dst_release: 812 dst_release(dst); 813 out: 814 icmpv6_xmit_unlock(sk); 815 out_bh_enable: 816 local_bh_enable(); 817 } 818 819 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) 820 { 821 struct inet6_skb_parm *opt = IP6CB(skb); 822 const struct inet6_protocol *ipprot; 823 int inner_offset; 824 __be16 frag_off; 825 u8 nexthdr; 826 struct net *net = dev_net(skb->dev); 827 828 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 829 goto out; 830 831 seg6_icmp_srh(skb, opt); 832 833 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; 834 if (ipv6_ext_hdr(nexthdr)) { 835 /* now skip over extension headers */ 836 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), 837 &nexthdr, &frag_off); 838 if (inner_offset < 0) 839 goto out; 840 } else { 841 inner_offset = sizeof(struct ipv6hdr); 842 } 843 844 /* Checkin header including 8 bytes of inner protocol header. */ 845 if (!pskb_may_pull(skb, inner_offset+8)) 846 goto out; 847 848 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. 849 Without this we will not able f.e. to make source routed 850 pmtu discovery. 851 Corresponding argument (opt) to notifiers is already added. 852 --ANK (980726) 853 */ 854 855 ipprot = rcu_dereference(inet6_protos[nexthdr]); 856 if (ipprot && ipprot->err_handler) 857 ipprot->err_handler(skb, opt, type, code, inner_offset, info); 858 859 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); 860 return; 861 862 out: 863 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); 864 } 865 866 /* 867 * Handle icmp messages 868 */ 869 870 static int icmpv6_rcv(struct sk_buff *skb) 871 { 872 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; 873 struct net *net = dev_net(skb->dev); 874 struct net_device *dev = icmp6_dev(skb); 875 struct inet6_dev *idev = __in6_dev_get(dev); 876 const struct in6_addr *saddr, *daddr; 877 struct icmp6hdr *hdr; 878 u8 type; 879 880 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 881 struct sec_path *sp = skb_sec_path(skb); 882 int nh; 883 884 if (!(sp && sp->xvec[sp->len - 1]->props.flags & 885 XFRM_STATE_ICMP)) { 886 reason = SKB_DROP_REASON_XFRM_POLICY; 887 goto drop_no_count; 888 } 889 890 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) 891 goto drop_no_count; 892 893 nh = skb_network_offset(skb); 894 skb_set_network_header(skb, sizeof(*hdr)); 895 896 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, 897 skb)) { 898 reason = SKB_DROP_REASON_XFRM_POLICY; 899 goto drop_no_count; 900 } 901 902 skb_set_network_header(skb, nh); 903 } 904 905 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); 906 907 saddr = &ipv6_hdr(skb)->saddr; 908 daddr = &ipv6_hdr(skb)->daddr; 909 910 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { 911 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", 912 saddr, daddr); 913 goto csum_error; 914 } 915 916 if (!pskb_pull(skb, sizeof(*hdr))) 917 goto discard_it; 918 919 hdr = icmp6_hdr(skb); 920 921 type = hdr->icmp6_type; 922 923 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); 924 925 switch (type) { 926 case ICMPV6_ECHO_REQUEST: 927 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) 928 icmpv6_echo_reply(skb); 929 break; 930 case ICMPV6_EXT_ECHO_REQUEST: 931 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && 932 READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) 933 icmpv6_echo_reply(skb); 934 break; 935 936 case ICMPV6_ECHO_REPLY: 937 reason = ping_rcv(skb); 938 break; 939 940 case ICMPV6_EXT_ECHO_REPLY: 941 reason = ping_rcv(skb); 942 break; 943 944 case ICMPV6_PKT_TOOBIG: 945 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update 946 standard destination cache. Seems, only "advanced" 947 destination cache will allow to solve this problem 948 --ANK (980726) 949 */ 950 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 951 goto discard_it; 952 hdr = icmp6_hdr(skb); 953 954 /* to notify */ 955 fallthrough; 956 case ICMPV6_DEST_UNREACH: 957 case ICMPV6_TIME_EXCEED: 958 case ICMPV6_PARAMPROB: 959 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); 960 break; 961 962 case NDISC_ROUTER_SOLICITATION: 963 case NDISC_ROUTER_ADVERTISEMENT: 964 case NDISC_NEIGHBOUR_SOLICITATION: 965 case NDISC_NEIGHBOUR_ADVERTISEMENT: 966 case NDISC_REDIRECT: 967 ndisc_rcv(skb); 968 break; 969 970 case ICMPV6_MGM_QUERY: 971 igmp6_event_query(skb); 972 return 0; 973 974 case ICMPV6_MGM_REPORT: 975 igmp6_event_report(skb); 976 return 0; 977 978 case ICMPV6_MGM_REDUCTION: 979 case ICMPV6_NI_QUERY: 980 case ICMPV6_NI_REPLY: 981 case ICMPV6_MLD2_REPORT: 982 case ICMPV6_DHAAD_REQUEST: 983 case ICMPV6_DHAAD_REPLY: 984 case ICMPV6_MOBILE_PREFIX_SOL: 985 case ICMPV6_MOBILE_PREFIX_ADV: 986 break; 987 988 default: 989 /* informational */ 990 if (type & ICMPV6_INFOMSG_MASK) 991 break; 992 993 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", 994 saddr, daddr); 995 996 /* 997 * error of unknown type. 998 * must pass to upper level 999 */ 1000 1001 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); 1002 } 1003 1004 /* until the v6 path can be better sorted assume failure and 1005 * preserve the status quo behaviour for the rest of the paths to here 1006 */ 1007 if (reason) 1008 kfree_skb_reason(skb, reason); 1009 else 1010 consume_skb(skb); 1011 1012 return 0; 1013 1014 csum_error: 1015 reason = SKB_DROP_REASON_ICMP_CSUM; 1016 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); 1017 discard_it: 1018 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); 1019 drop_no_count: 1020 kfree_skb_reason(skb, reason); 1021 return 0; 1022 } 1023 1024 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, 1025 u8 type, 1026 const struct in6_addr *saddr, 1027 const struct in6_addr *daddr, 1028 int oif) 1029 { 1030 memset(fl6, 0, sizeof(*fl6)); 1031 fl6->saddr = *saddr; 1032 fl6->daddr = *daddr; 1033 fl6->flowi6_proto = IPPROTO_ICMPV6; 1034 fl6->fl6_icmp_type = type; 1035 fl6->fl6_icmp_code = 0; 1036 fl6->flowi6_oif = oif; 1037 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 1038 } 1039 1040 int __init icmpv6_init(void) 1041 { 1042 struct sock *sk; 1043 int err, i; 1044 1045 for_each_possible_cpu(i) { 1046 err = inet_ctl_sock_create(&sk, PF_INET6, 1047 SOCK_RAW, IPPROTO_ICMPV6, &init_net); 1048 if (err < 0) { 1049 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", 1050 err); 1051 return err; 1052 } 1053 1054 per_cpu(ipv6_icmp_sk, i) = sk; 1055 1056 /* Enough space for 2 64K ICMP packets, including 1057 * sk_buff struct overhead. 1058 */ 1059 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); 1060 } 1061 1062 err = -EAGAIN; 1063 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) 1064 goto fail; 1065 1066 err = inet6_register_icmp_sender(icmp6_send); 1067 if (err) 1068 goto sender_reg_err; 1069 return 0; 1070 1071 sender_reg_err: 1072 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 1073 fail: 1074 pr_err("Failed to register ICMP6 protocol\n"); 1075 return err; 1076 } 1077 1078 void icmpv6_cleanup(void) 1079 { 1080 inet6_unregister_icmp_sender(icmp6_send); 1081 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 1082 } 1083 1084 1085 static const struct icmp6_err { 1086 int err; 1087 int fatal; 1088 } tab_unreach[] = { 1089 { /* NOROUTE */ 1090 .err = ENETUNREACH, 1091 .fatal = 0, 1092 }, 1093 { /* ADM_PROHIBITED */ 1094 .err = EACCES, 1095 .fatal = 1, 1096 }, 1097 { /* Was NOT_NEIGHBOUR, now reserved */ 1098 .err = EHOSTUNREACH, 1099 .fatal = 0, 1100 }, 1101 { /* ADDR_UNREACH */ 1102 .err = EHOSTUNREACH, 1103 .fatal = 0, 1104 }, 1105 { /* PORT_UNREACH */ 1106 .err = ECONNREFUSED, 1107 .fatal = 1, 1108 }, 1109 { /* POLICY_FAIL */ 1110 .err = EACCES, 1111 .fatal = 1, 1112 }, 1113 { /* REJECT_ROUTE */ 1114 .err = EACCES, 1115 .fatal = 1, 1116 }, 1117 }; 1118 1119 int icmpv6_err_convert(u8 type, u8 code, int *err) 1120 { 1121 int fatal = 0; 1122 1123 *err = EPROTO; 1124 1125 switch (type) { 1126 case ICMPV6_DEST_UNREACH: 1127 fatal = 1; 1128 if (code < ARRAY_SIZE(tab_unreach)) { 1129 *err = tab_unreach[code].err; 1130 fatal = tab_unreach[code].fatal; 1131 } 1132 break; 1133 1134 case ICMPV6_PKT_TOOBIG: 1135 *err = EMSGSIZE; 1136 break; 1137 1138 case ICMPV6_PARAMPROB: 1139 *err = EPROTO; 1140 fatal = 1; 1141 break; 1142 1143 case ICMPV6_TIME_EXCEED: 1144 *err = EHOSTUNREACH; 1145 break; 1146 } 1147 1148 return fatal; 1149 } 1150 EXPORT_SYMBOL(icmpv6_err_convert); 1151 1152 #ifdef CONFIG_SYSCTL 1153 static struct ctl_table ipv6_icmp_table_template[] = { 1154 { 1155 .procname = "ratelimit", 1156 .data = &init_net.ipv6.sysctl.icmpv6_time, 1157 .maxlen = sizeof(int), 1158 .mode = 0644, 1159 .proc_handler = proc_dointvec_ms_jiffies, 1160 }, 1161 { 1162 .procname = "echo_ignore_all", 1163 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, 1164 .maxlen = sizeof(u8), 1165 .mode = 0644, 1166 .proc_handler = proc_dou8vec_minmax, 1167 }, 1168 { 1169 .procname = "echo_ignore_multicast", 1170 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, 1171 .maxlen = sizeof(u8), 1172 .mode = 0644, 1173 .proc_handler = proc_dou8vec_minmax, 1174 }, 1175 { 1176 .procname = "echo_ignore_anycast", 1177 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, 1178 .maxlen = sizeof(u8), 1179 .mode = 0644, 1180 .proc_handler = proc_dou8vec_minmax, 1181 }, 1182 { 1183 .procname = "ratemask", 1184 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr, 1185 .maxlen = ICMPV6_MSG_MAX + 1, 1186 .mode = 0644, 1187 .proc_handler = proc_do_large_bitmap, 1188 }, 1189 { }, 1190 }; 1191 1192 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) 1193 { 1194 struct ctl_table *table; 1195 1196 table = kmemdup(ipv6_icmp_table_template, 1197 sizeof(ipv6_icmp_table_template), 1198 GFP_KERNEL); 1199 1200 if (table) { 1201 table[0].data = &net->ipv6.sysctl.icmpv6_time; 1202 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; 1203 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; 1204 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; 1205 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; 1206 } 1207 return table; 1208 } 1209 #endif 1210