1 /* 2 * ip_vs_xmit.c: various packet transmitters for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * Julian Anastasov <ja@ssi.bg> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Changes: 13 * 14 * Description of forwarding methods: 15 * - all transmitters are called from LOCAL_IN (remote clients) and 16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD 17 * - not all connections have destination server, for example, 18 * connections in backup server when fwmark is used 19 * - bypass connections use daddr from packet 20 * - we can use dst without ref while sending in RCU section, we use 21 * ref when returning NF_ACCEPT for NAT-ed packet via loopback 22 * LOCAL_OUT rules: 23 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) 24 * - skb->pkt_type is not set yet 25 * - the only place where we can see skb->sk != NULL 26 */ 27 28 #define KMSG_COMPONENT "IPVS" 29 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 30 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <linux/tcp.h> /* for tcphdr */ 34 #include <net/ip.h> 35 #include <net/tcp.h> /* for csum_tcpudp_magic */ 36 #include <net/udp.h> 37 #include <net/icmp.h> /* for icmp_send */ 38 #include <net/route.h> /* for ip_route_output */ 39 #include <net/ipv6.h> 40 #include <net/ip6_route.h> 41 #include <net/ip_tunnels.h> 42 #include <net/addrconf.h> 43 #include <linux/icmpv6.h> 44 #include <linux/netfilter.h> 45 #include <linux/netfilter_ipv4.h> 46 47 #include <net/ip_vs.h> 48 49 enum { 50 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ 51 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ 52 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to 53 * local 54 */ 55 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ 56 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ 57 IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ 58 }; 59 60 static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) 61 { 62 return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); 63 } 64 65 static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) 66 { 67 kfree(dest_dst); 68 } 69 70 /* 71 * Destination cache to speed up outgoing route lookup 72 */ 73 static inline void 74 __ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, 75 struct dst_entry *dst, u32 dst_cookie) 76 { 77 struct ip_vs_dest_dst *old; 78 79 old = rcu_dereference_protected(dest->dest_dst, 80 lockdep_is_held(&dest->dst_lock)); 81 82 if (dest_dst) { 83 dest_dst->dst_cache = dst; 84 dest_dst->dst_cookie = dst_cookie; 85 } 86 rcu_assign_pointer(dest->dest_dst, dest_dst); 87 88 if (old) 89 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 90 } 91 92 static inline struct ip_vs_dest_dst * 93 __ip_vs_dst_check(struct ip_vs_dest *dest) 94 { 95 struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); 96 struct dst_entry *dst; 97 98 if (!dest_dst) 99 return NULL; 100 dst = dest_dst->dst_cache; 101 if (dst->obsolete && 102 dst->ops->check(dst, dest_dst->dst_cookie) == NULL) 103 return NULL; 104 return dest_dst; 105 } 106 107 static inline bool 108 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) 109 { 110 if (IP6CB(skb)->frag_max_size) { 111 /* frag_max_size tell us that, this packet have been 112 * defragmented by netfilter IPv6 conntrack module. 113 */ 114 if (IP6CB(skb)->frag_max_size > mtu) 115 return true; /* largest fragment violate MTU */ 116 } 117 else if (skb->len > mtu && !skb_is_gso(skb)) { 118 return true; /* Packet size violate MTU size */ 119 } 120 return false; 121 } 122 123 /* Get route to daddr, update *saddr, optionally bind route to saddr */ 124 static struct rtable *do_output_route4(struct net *net, __be32 daddr, 125 int rt_mode, __be32 *saddr) 126 { 127 struct flowi4 fl4; 128 struct rtable *rt; 129 int loop = 0; 130 131 memset(&fl4, 0, sizeof(fl4)); 132 fl4.daddr = daddr; 133 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; 134 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 135 FLOWI_FLAG_KNOWN_NH : 0; 136 137 retry: 138 rt = ip_route_output_key(net, &fl4); 139 if (IS_ERR(rt)) { 140 /* Invalid saddr ? */ 141 if (PTR_ERR(rt) == -EINVAL && *saddr && 142 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { 143 *saddr = 0; 144 flowi4_update_output(&fl4, 0, 0, daddr, 0); 145 goto retry; 146 } 147 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); 148 return NULL; 149 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { 150 ip_rt_put(rt); 151 *saddr = fl4.saddr; 152 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); 153 loop++; 154 goto retry; 155 } 156 *saddr = fl4.saddr; 157 return rt; 158 } 159 160 #ifdef CONFIG_IP_VS_IPV6 161 static inline int __ip_vs_is_local_route6(struct rt6_info *rt) 162 { 163 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; 164 } 165 #endif 166 167 static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, 168 int rt_mode, 169 bool new_rt_is_local) 170 { 171 bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); 172 bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); 173 bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR); 174 bool source_is_loopback; 175 bool old_rt_is_local; 176 177 #ifdef CONFIG_IP_VS_IPV6 178 if (skb_af == AF_INET6) { 179 int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); 180 181 source_is_loopback = 182 (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 183 (addr_type & IPV6_ADDR_LOOPBACK); 184 old_rt_is_local = __ip_vs_is_local_route6( 185 (struct rt6_info *)skb_dst(skb)); 186 } else 187 #endif 188 { 189 source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr); 190 old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 191 } 192 193 if (unlikely(new_rt_is_local)) { 194 if (!rt_mode_allow_local) 195 return true; 196 if (!rt_mode_allow_redirect && !old_rt_is_local) 197 return true; 198 } else { 199 if (!rt_mode_allow_non_local) 200 return true; 201 if (source_is_loopback) 202 return true; 203 } 204 return false; 205 } 206 207 static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) 208 { 209 struct sock *sk = skb->sk; 210 struct rtable *ort = skb_rtable(skb); 211 212 if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) 213 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); 214 } 215 216 static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, 217 struct ip_vs_iphdr *ipvsh, 218 struct sk_buff *skb, int mtu) 219 { 220 #ifdef CONFIG_IP_VS_IPV6 221 if (skb_af == AF_INET6) { 222 struct net *net = dev_net(skb_dst(skb)->dev); 223 224 if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { 225 if (!skb->dev) 226 skb->dev = net->loopback_dev; 227 /* only send ICMP too big on first fragment */ 228 if (!ipvsh->fragoffs) 229 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 230 IP_VS_DBG(1, "frag needed for %pI6c\n", 231 &ipv6_hdr(skb)->saddr); 232 return false; 233 } 234 } else 235 #endif 236 { 237 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 238 239 /* If we're going to tunnel the packet and pmtu discovery 240 * is disabled, we'll just fragment it anyway 241 */ 242 if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs)) 243 return true; 244 245 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && 246 skb->len > mtu && !skb_is_gso(skb))) { 247 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 248 htonl(mtu)); 249 IP_VS_DBG(1, "frag needed for %pI4\n", 250 &ip_hdr(skb)->saddr); 251 return false; 252 } 253 } 254 255 return true; 256 } 257 258 /* Get route to destination or remote server */ 259 static int 260 __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, 261 __be32 daddr, int rt_mode, __be32 *ret_saddr, 262 struct ip_vs_iphdr *ipvsh) 263 { 264 struct net *net = dev_net(skb_dst(skb)->dev); 265 struct ip_vs_dest_dst *dest_dst; 266 struct rtable *rt; /* Route to the other host */ 267 int mtu; 268 int local, noref = 1; 269 270 if (dest) { 271 dest_dst = __ip_vs_dst_check(dest); 272 if (likely(dest_dst)) 273 rt = (struct rtable *) dest_dst->dst_cache; 274 else { 275 dest_dst = ip_vs_dest_dst_alloc(); 276 spin_lock_bh(&dest->dst_lock); 277 if (!dest_dst) { 278 __ip_vs_dst_set(dest, NULL, NULL, 0); 279 spin_unlock_bh(&dest->dst_lock); 280 goto err_unreach; 281 } 282 rt = do_output_route4(net, dest->addr.ip, rt_mode, 283 &dest_dst->dst_saddr.ip); 284 if (!rt) { 285 __ip_vs_dst_set(dest, NULL, NULL, 0); 286 spin_unlock_bh(&dest->dst_lock); 287 ip_vs_dest_dst_free(dest_dst); 288 goto err_unreach; 289 } 290 __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); 291 spin_unlock_bh(&dest->dst_lock); 292 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", 293 &dest->addr.ip, &dest_dst->dst_saddr.ip, 294 atomic_read(&rt->dst.__refcnt)); 295 } 296 if (ret_saddr) 297 *ret_saddr = dest_dst->dst_saddr.ip; 298 } else { 299 __be32 saddr = htonl(INADDR_ANY); 300 301 noref = 0; 302 303 /* For such unconfigured boxes avoid many route lookups 304 * for performance reasons because we do not remember saddr 305 */ 306 rt_mode &= ~IP_VS_RT_MODE_CONNECT; 307 rt = do_output_route4(net, daddr, rt_mode, &saddr); 308 if (!rt) 309 goto err_unreach; 310 if (ret_saddr) 311 *ret_saddr = saddr; 312 } 313 314 local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; 315 if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, 316 local))) { 317 IP_VS_DBG_RL("We are crossing local and non-local addresses" 318 " daddr=%pI4\n", &daddr); 319 goto err_put; 320 } 321 322 if (unlikely(local)) { 323 /* skb to local stack, preserve old route */ 324 if (!noref) 325 ip_rt_put(rt); 326 return local; 327 } 328 329 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { 330 mtu = dst_mtu(&rt->dst); 331 } else { 332 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 333 if (mtu < 68) { 334 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 335 goto err_put; 336 } 337 maybe_update_pmtu(skb_af, skb, mtu); 338 } 339 340 if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) 341 goto err_put; 342 343 skb_dst_drop(skb); 344 if (noref) { 345 if (!local) 346 skb_dst_set_noref(skb, &rt->dst); 347 else 348 skb_dst_set(skb, dst_clone(&rt->dst)); 349 } else 350 skb_dst_set(skb, &rt->dst); 351 352 return local; 353 354 err_put: 355 if (!noref) 356 ip_rt_put(rt); 357 return -1; 358 359 err_unreach: 360 dst_link_failure(skb); 361 return -1; 362 } 363 364 #ifdef CONFIG_IP_VS_IPV6 365 static struct dst_entry * 366 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, 367 struct in6_addr *ret_saddr, int do_xfrm) 368 { 369 struct dst_entry *dst; 370 struct flowi6 fl6 = { 371 .daddr = *daddr, 372 }; 373 374 dst = ip6_route_output(net, NULL, &fl6); 375 if (dst->error) 376 goto out_err; 377 if (!ret_saddr) 378 return dst; 379 if (ipv6_addr_any(&fl6.saddr) && 380 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, 381 &fl6.daddr, 0, &fl6.saddr) < 0) 382 goto out_err; 383 if (do_xfrm) { 384 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); 385 if (IS_ERR(dst)) { 386 dst = NULL; 387 goto out_err; 388 } 389 } 390 *ret_saddr = fl6.saddr; 391 return dst; 392 393 out_err: 394 dst_release(dst); 395 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); 396 return NULL; 397 } 398 399 /* 400 * Get route to destination or remote server 401 */ 402 static int 403 __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, 404 struct in6_addr *daddr, struct in6_addr *ret_saddr, 405 struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) 406 { 407 struct net *net = dev_net(skb_dst(skb)->dev); 408 struct ip_vs_dest_dst *dest_dst; 409 struct rt6_info *rt; /* Route to the other host */ 410 struct dst_entry *dst; 411 int mtu; 412 int local, noref = 1; 413 414 if (dest) { 415 dest_dst = __ip_vs_dst_check(dest); 416 if (likely(dest_dst)) 417 rt = (struct rt6_info *) dest_dst->dst_cache; 418 else { 419 u32 cookie; 420 421 dest_dst = ip_vs_dest_dst_alloc(); 422 spin_lock_bh(&dest->dst_lock); 423 if (!dest_dst) { 424 __ip_vs_dst_set(dest, NULL, NULL, 0); 425 spin_unlock_bh(&dest->dst_lock); 426 goto err_unreach; 427 } 428 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 429 &dest_dst->dst_saddr.in6, 430 do_xfrm); 431 if (!dst) { 432 __ip_vs_dst_set(dest, NULL, NULL, 0); 433 spin_unlock_bh(&dest->dst_lock); 434 ip_vs_dest_dst_free(dest_dst); 435 goto err_unreach; 436 } 437 rt = (struct rt6_info *) dst; 438 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 439 __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); 440 spin_unlock_bh(&dest->dst_lock); 441 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 442 &dest->addr.in6, &dest_dst->dst_saddr.in6, 443 atomic_read(&rt->dst.__refcnt)); 444 } 445 if (ret_saddr) 446 *ret_saddr = dest_dst->dst_saddr.in6; 447 } else { 448 noref = 0; 449 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); 450 if (!dst) 451 goto err_unreach; 452 rt = (struct rt6_info *) dst; 453 } 454 455 local = __ip_vs_is_local_route6(rt); 456 457 if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, 458 local))) { 459 IP_VS_DBG_RL("We are crossing local and non-local addresses" 460 " daddr=%pI6\n", daddr); 461 goto err_put; 462 } 463 464 if (unlikely(local)) { 465 /* skb to local stack, preserve old route */ 466 if (!noref) 467 dst_release(&rt->dst); 468 return local; 469 } 470 471 /* MTU checking */ 472 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) 473 mtu = dst_mtu(&rt->dst); 474 else { 475 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 476 if (mtu < IPV6_MIN_MTU) { 477 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, 478 IPV6_MIN_MTU); 479 goto err_put; 480 } 481 maybe_update_pmtu(skb_af, skb, mtu); 482 } 483 484 if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) 485 goto err_put; 486 487 skb_dst_drop(skb); 488 if (noref) { 489 if (!local) 490 skb_dst_set_noref(skb, &rt->dst); 491 else 492 skb_dst_set(skb, dst_clone(&rt->dst)); 493 } else 494 skb_dst_set(skb, &rt->dst); 495 496 return local; 497 498 err_put: 499 if (!noref) 500 dst_release(&rt->dst); 501 return -1; 502 503 err_unreach: 504 dst_link_failure(skb); 505 return -1; 506 } 507 #endif 508 509 510 /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ 511 static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, 512 struct ip_vs_conn *cp) 513 { 514 int ret = NF_ACCEPT; 515 516 skb->ipvs_property = 1; 517 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) 518 ret = ip_vs_confirm_conntrack(skb); 519 if (ret == NF_ACCEPT) { 520 nf_reset(skb); 521 skb_forward_csum(skb); 522 } 523 return ret; 524 } 525 526 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 527 static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, 528 struct ip_vs_conn *cp, int local) 529 { 530 int ret = NF_STOLEN; 531 532 skb->ipvs_property = 1; 533 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 534 ip_vs_notrack(skb); 535 else 536 ip_vs_update_conntrack(skb, cp, 1); 537 if (!local) { 538 skb_forward_csum(skb); 539 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, 540 dst_output); 541 } else 542 ret = NF_ACCEPT; 543 return ret; 544 } 545 546 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 547 static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, 548 struct ip_vs_conn *cp, int local) 549 { 550 int ret = NF_STOLEN; 551 552 skb->ipvs_property = 1; 553 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 554 ip_vs_notrack(skb); 555 if (!local) { 556 skb_forward_csum(skb); 557 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, 558 dst_output); 559 } else 560 ret = NF_ACCEPT; 561 return ret; 562 } 563 564 565 /* 566 * NULL transmitter (do nothing except return NF_ACCEPT) 567 */ 568 int 569 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 570 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 571 { 572 /* we do not touch skb and do not need pskb ptr */ 573 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 574 } 575 576 577 /* 578 * Bypass transmitter 579 * Let packets bypass the destination when the destination is not 580 * available, it may be only used in transparent cache cluster. 581 */ 582 int 583 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 584 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 585 { 586 struct iphdr *iph = ip_hdr(skb); 587 588 EnterFunction(10); 589 590 rcu_read_lock(); 591 if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr, 592 IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) 593 goto tx_error; 594 595 ip_send_check(iph); 596 597 /* Another hack: avoid icmp_send in ip_fragment */ 598 skb->ignore_df = 1; 599 600 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); 601 rcu_read_unlock(); 602 603 LeaveFunction(10); 604 return NF_STOLEN; 605 606 tx_error: 607 kfree_skb(skb); 608 rcu_read_unlock(); 609 LeaveFunction(10); 610 return NF_STOLEN; 611 } 612 613 #ifdef CONFIG_IP_VS_IPV6 614 int 615 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 616 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 617 { 618 EnterFunction(10); 619 620 rcu_read_lock(); 621 if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL, 622 ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) 623 goto tx_error; 624 625 /* Another hack: avoid icmp_send in ip_fragment */ 626 skb->ignore_df = 1; 627 628 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); 629 rcu_read_unlock(); 630 631 LeaveFunction(10); 632 return NF_STOLEN; 633 634 tx_error: 635 kfree_skb(skb); 636 rcu_read_unlock(); 637 LeaveFunction(10); 638 return NF_STOLEN; 639 } 640 #endif 641 642 /* 643 * NAT transmitter (only for outside-to-inside nat forwarding) 644 * Not used for related ICMP 645 */ 646 int 647 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 648 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 649 { 650 struct rtable *rt; /* Route to the other host */ 651 int local, rc, was_input; 652 653 EnterFunction(10); 654 655 rcu_read_lock(); 656 /* check if it is a connection of no-client-port */ 657 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 658 __be16 _pt, *p; 659 660 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); 661 if (p == NULL) 662 goto tx_error; 663 ip_vs_conn_fill_cport(cp, *p); 664 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 665 } 666 667 was_input = rt_is_input_route(skb_rtable(skb)); 668 local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, 669 IP_VS_RT_MODE_LOCAL | 670 IP_VS_RT_MODE_NON_LOCAL | 671 IP_VS_RT_MODE_RDR, NULL, ipvsh); 672 if (local < 0) 673 goto tx_error; 674 rt = skb_rtable(skb); 675 /* 676 * Avoid duplicate tuple in reply direction for NAT traffic 677 * to local address when connection is sync-ed 678 */ 679 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 680 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 681 enum ip_conntrack_info ctinfo; 682 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 683 684 if (ct && !nf_ct_is_untracked(ct)) { 685 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, 686 "ip_vs_nat_xmit(): " 687 "stopping DNAT to local address"); 688 goto tx_error; 689 } 690 } 691 #endif 692 693 /* From world but DNAT to loopback address? */ 694 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { 695 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 696 "stopping DNAT to loopback address"); 697 goto tx_error; 698 } 699 700 /* copy-on-write the packet before mangling it */ 701 if (!skb_make_writable(skb, sizeof(struct iphdr))) 702 goto tx_error; 703 704 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 705 goto tx_error; 706 707 /* mangle the packet */ 708 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 709 goto tx_error; 710 ip_hdr(skb)->daddr = cp->daddr.ip; 711 ip_send_check(ip_hdr(skb)); 712 713 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); 714 715 /* FIXME: when application helper enlarges the packet and the length 716 is larger than the MTU of outgoing device, there will be still 717 MTU problem. */ 718 719 /* Another hack: avoid icmp_send in ip_fragment */ 720 skb->ignore_df = 1; 721 722 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); 723 rcu_read_unlock(); 724 725 LeaveFunction(10); 726 return rc; 727 728 tx_error: 729 kfree_skb(skb); 730 rcu_read_unlock(); 731 LeaveFunction(10); 732 return NF_STOLEN; 733 } 734 735 #ifdef CONFIG_IP_VS_IPV6 736 int 737 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 738 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 739 { 740 struct rt6_info *rt; /* Route to the other host */ 741 int local, rc; 742 743 EnterFunction(10); 744 745 rcu_read_lock(); 746 /* check if it is a connection of no-client-port */ 747 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { 748 __be16 _pt, *p; 749 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); 750 if (p == NULL) 751 goto tx_error; 752 ip_vs_conn_fill_cport(cp, *p); 753 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 754 } 755 756 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 757 NULL, ipvsh, 0, 758 IP_VS_RT_MODE_LOCAL | 759 IP_VS_RT_MODE_NON_LOCAL | 760 IP_VS_RT_MODE_RDR); 761 if (local < 0) 762 goto tx_error; 763 rt = (struct rt6_info *) skb_dst(skb); 764 /* 765 * Avoid duplicate tuple in reply direction for NAT traffic 766 * to local address when connection is sync-ed 767 */ 768 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 769 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 770 enum ip_conntrack_info ctinfo; 771 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 772 773 if (ct && !nf_ct_is_untracked(ct)) { 774 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, 775 "ip_vs_nat_xmit_v6(): " 776 "stopping DNAT to local address"); 777 goto tx_error; 778 } 779 } 780 #endif 781 782 /* From world but DNAT to loopback address? */ 783 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 784 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { 785 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, 786 "ip_vs_nat_xmit_v6(): " 787 "stopping DNAT to loopback address"); 788 goto tx_error; 789 } 790 791 /* copy-on-write the packet before mangling it */ 792 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 793 goto tx_error; 794 795 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 796 goto tx_error; 797 798 /* mangle the packet */ 799 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 800 goto tx_error; 801 ipv6_hdr(skb)->daddr = cp->daddr.in6; 802 803 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); 804 805 /* FIXME: when application helper enlarges the packet and the length 806 is larger than the MTU of outgoing device, there will be still 807 MTU problem. */ 808 809 /* Another hack: avoid icmp_send in ip_fragment */ 810 skb->ignore_df = 1; 811 812 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); 813 rcu_read_unlock(); 814 815 LeaveFunction(10); 816 return rc; 817 818 tx_error: 819 LeaveFunction(10); 820 kfree_skb(skb); 821 rcu_read_unlock(); 822 return NF_STOLEN; 823 } 824 #endif 825 826 /* When forwarding a packet, we must ensure that we've got enough headroom 827 * for the encapsulation packet in the skb. This also gives us an 828 * opportunity to figure out what the payload_len, dsfield, ttl, and df 829 * values should be, so that we won't need to look at the old ip header 830 * again 831 */ 832 static struct sk_buff * 833 ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, 834 unsigned int max_headroom, __u8 *next_protocol, 835 __u32 *payload_len, __u8 *dsfield, __u8 *ttl, 836 __be16 *df) 837 { 838 struct sk_buff *new_skb = NULL; 839 struct iphdr *old_iph = NULL; 840 #ifdef CONFIG_IP_VS_IPV6 841 struct ipv6hdr *old_ipv6h = NULL; 842 #endif 843 844 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { 845 new_skb = skb_realloc_headroom(skb, max_headroom); 846 if (!new_skb) 847 goto error; 848 if (skb->sk) 849 skb_set_owner_w(new_skb, skb->sk); 850 consume_skb(skb); 851 skb = new_skb; 852 } 853 854 #ifdef CONFIG_IP_VS_IPV6 855 if (skb_af == AF_INET6) { 856 old_ipv6h = ipv6_hdr(skb); 857 *next_protocol = IPPROTO_IPV6; 858 if (payload_len) 859 *payload_len = 860 ntohs(old_ipv6h->payload_len) + 861 sizeof(*old_ipv6h); 862 *dsfield = ipv6_get_dsfield(old_ipv6h); 863 *ttl = old_ipv6h->hop_limit; 864 if (df) 865 *df = 0; 866 } else 867 #endif 868 { 869 old_iph = ip_hdr(skb); 870 /* Copy DF, reset fragment offset and MF */ 871 if (df) 872 *df = (old_iph->frag_off & htons(IP_DF)); 873 *next_protocol = IPPROTO_IPIP; 874 875 /* fix old IP header checksum */ 876 ip_send_check(old_iph); 877 *dsfield = ipv4_get_dsfield(old_iph); 878 *ttl = old_iph->ttl; 879 if (payload_len) 880 *payload_len = ntohs(old_iph->tot_len); 881 } 882 883 return skb; 884 error: 885 kfree_skb(skb); 886 return ERR_PTR(-ENOMEM); 887 } 888 889 static inline int __tun_gso_type_mask(int encaps_af, int orig_af) 890 { 891 if (encaps_af == AF_INET) { 892 if (orig_af == AF_INET) 893 return SKB_GSO_IPIP; 894 895 return SKB_GSO_SIT; 896 } 897 898 /* GSO: we need to provide proper SKB_GSO_ value for IPv6: 899 * SKB_GSO_SIT/IPV6 900 */ 901 return 0; 902 } 903 904 /* 905 * IP Tunneling transmitter 906 * 907 * This function encapsulates the packet in a new IP packet, its 908 * destination will be set to cp->daddr. Most code of this function 909 * is taken from ipip.c. 910 * 911 * It is used in VS/TUN cluster. The load balancer selects a real 912 * server from a cluster based on a scheduling algorithm, 913 * encapsulates the request packet and forwards it to the selected 914 * server. For example, all real servers are configured with 915 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives 916 * the encapsulated packet, it will decapsulate the packet, processe 917 * the request and return the response packets directly to the client 918 * without passing the load balancer. This can greatly increase the 919 * scalability of virtual server. 920 * 921 * Used for ANY protocol 922 */ 923 int 924 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 925 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 926 { 927 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 928 struct rtable *rt; /* Route to the other host */ 929 __be32 saddr; /* Source for tunnel */ 930 struct net_device *tdev; /* Device to other host */ 931 __u8 next_protocol = 0; 932 __u8 dsfield = 0; 933 __u8 ttl = 0; 934 __be16 df = 0; 935 __be16 *dfp = NULL; 936 struct iphdr *iph; /* Our new IP header */ 937 unsigned int max_headroom; /* The extra header space needed */ 938 int ret, local; 939 940 EnterFunction(10); 941 942 rcu_read_lock(); 943 local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, 944 IP_VS_RT_MODE_LOCAL | 945 IP_VS_RT_MODE_NON_LOCAL | 946 IP_VS_RT_MODE_CONNECT | 947 IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh); 948 if (local < 0) 949 goto tx_error; 950 if (local) { 951 rcu_read_unlock(); 952 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 953 } 954 955 rt = skb_rtable(skb); 956 tdev = rt->dst.dev; 957 958 /* 959 * Okay, now see if we can stuff it in the buffer as-is. 960 */ 961 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); 962 963 /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ 964 dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; 965 skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, 966 &next_protocol, NULL, &dsfield, 967 &ttl, dfp); 968 if (IS_ERR(skb)) 969 goto tx_error; 970 971 skb = iptunnel_handle_offloads( 972 skb, false, __tun_gso_type_mask(AF_INET, cp->af)); 973 if (IS_ERR(skb)) 974 goto tx_error; 975 976 skb->transport_header = skb->network_header; 977 978 skb_push(skb, sizeof(struct iphdr)); 979 skb_reset_network_header(skb); 980 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 981 982 /* 983 * Push down and install the IPIP header. 984 */ 985 iph = ip_hdr(skb); 986 iph->version = 4; 987 iph->ihl = sizeof(struct iphdr)>>2; 988 iph->frag_off = df; 989 iph->protocol = next_protocol; 990 iph->tos = dsfield; 991 iph->daddr = cp->daddr.ip; 992 iph->saddr = saddr; 993 iph->ttl = ttl; 994 ip_select_ident(skb, NULL); 995 996 /* Another hack: avoid icmp_send in ip_fragment */ 997 skb->ignore_df = 1; 998 999 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 1000 if (ret == NF_ACCEPT) 1001 ip_local_out(skb); 1002 else if (ret == NF_DROP) 1003 kfree_skb(skb); 1004 rcu_read_unlock(); 1005 1006 LeaveFunction(10); 1007 1008 return NF_STOLEN; 1009 1010 tx_error: 1011 if (!IS_ERR(skb)) 1012 kfree_skb(skb); 1013 rcu_read_unlock(); 1014 LeaveFunction(10); 1015 return NF_STOLEN; 1016 } 1017 1018 #ifdef CONFIG_IP_VS_IPV6 1019 int 1020 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1021 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1022 { 1023 struct rt6_info *rt; /* Route to the other host */ 1024 struct in6_addr saddr; /* Source for tunnel */ 1025 struct net_device *tdev; /* Device to other host */ 1026 __u8 next_protocol = 0; 1027 __u32 payload_len = 0; 1028 __u8 dsfield = 0; 1029 __u8 ttl = 0; 1030 struct ipv6hdr *iph; /* Our new IP header */ 1031 unsigned int max_headroom; /* The extra header space needed */ 1032 int ret, local; 1033 1034 EnterFunction(10); 1035 1036 rcu_read_lock(); 1037 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 1038 &saddr, ipvsh, 1, 1039 IP_VS_RT_MODE_LOCAL | 1040 IP_VS_RT_MODE_NON_LOCAL | 1041 IP_VS_RT_MODE_TUNNEL); 1042 if (local < 0) 1043 goto tx_error; 1044 if (local) { 1045 rcu_read_unlock(); 1046 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); 1047 } 1048 1049 rt = (struct rt6_info *) skb_dst(skb); 1050 tdev = rt->dst.dev; 1051 1052 /* 1053 * Okay, now see if we can stuff it in the buffer as-is. 1054 */ 1055 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); 1056 1057 skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, 1058 &next_protocol, &payload_len, 1059 &dsfield, &ttl, NULL); 1060 if (IS_ERR(skb)) 1061 goto tx_error; 1062 1063 skb = iptunnel_handle_offloads( 1064 skb, false, __tun_gso_type_mask(AF_INET6, cp->af)); 1065 if (IS_ERR(skb)) 1066 goto tx_error; 1067 1068 skb->transport_header = skb->network_header; 1069 1070 skb_push(skb, sizeof(struct ipv6hdr)); 1071 skb_reset_network_header(skb); 1072 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1073 1074 /* 1075 * Push down and install the IPIP header. 1076 */ 1077 iph = ipv6_hdr(skb); 1078 iph->version = 6; 1079 iph->nexthdr = next_protocol; 1080 iph->payload_len = htons(payload_len); 1081 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 1082 ipv6_change_dsfield(iph, 0, dsfield); 1083 iph->daddr = cp->daddr.in6; 1084 iph->saddr = saddr; 1085 iph->hop_limit = ttl; 1086 1087 /* Another hack: avoid icmp_send in ip_fragment */ 1088 skb->ignore_df = 1; 1089 1090 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 1091 if (ret == NF_ACCEPT) 1092 ip6_local_out(skb); 1093 else if (ret == NF_DROP) 1094 kfree_skb(skb); 1095 rcu_read_unlock(); 1096 1097 LeaveFunction(10); 1098 1099 return NF_STOLEN; 1100 1101 tx_error: 1102 if (!IS_ERR(skb)) 1103 kfree_skb(skb); 1104 rcu_read_unlock(); 1105 LeaveFunction(10); 1106 return NF_STOLEN; 1107 } 1108 #endif 1109 1110 1111 /* 1112 * Direct Routing transmitter 1113 * Used for ANY protocol 1114 */ 1115 int 1116 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1117 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1118 { 1119 int local; 1120 1121 EnterFunction(10); 1122 1123 rcu_read_lock(); 1124 local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, 1125 IP_VS_RT_MODE_LOCAL | 1126 IP_VS_RT_MODE_NON_LOCAL | 1127 IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); 1128 if (local < 0) 1129 goto tx_error; 1130 if (local) { 1131 rcu_read_unlock(); 1132 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 1133 } 1134 1135 ip_send_check(ip_hdr(skb)); 1136 1137 /* Another hack: avoid icmp_send in ip_fragment */ 1138 skb->ignore_df = 1; 1139 1140 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); 1141 rcu_read_unlock(); 1142 1143 LeaveFunction(10); 1144 return NF_STOLEN; 1145 1146 tx_error: 1147 kfree_skb(skb); 1148 rcu_read_unlock(); 1149 LeaveFunction(10); 1150 return NF_STOLEN; 1151 } 1152 1153 #ifdef CONFIG_IP_VS_IPV6 1154 int 1155 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1156 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1157 { 1158 int local; 1159 1160 EnterFunction(10); 1161 1162 rcu_read_lock(); 1163 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 1164 NULL, ipvsh, 0, 1165 IP_VS_RT_MODE_LOCAL | 1166 IP_VS_RT_MODE_NON_LOCAL); 1167 if (local < 0) 1168 goto tx_error; 1169 if (local) { 1170 rcu_read_unlock(); 1171 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); 1172 } 1173 1174 /* Another hack: avoid icmp_send in ip_fragment */ 1175 skb->ignore_df = 1; 1176 1177 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); 1178 rcu_read_unlock(); 1179 1180 LeaveFunction(10); 1181 return NF_STOLEN; 1182 1183 tx_error: 1184 kfree_skb(skb); 1185 rcu_read_unlock(); 1186 LeaveFunction(10); 1187 return NF_STOLEN; 1188 } 1189 #endif 1190 1191 1192 /* 1193 * ICMP packet transmitter 1194 * called by the ip_vs_in_icmp 1195 */ 1196 int 1197 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1198 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1199 struct ip_vs_iphdr *iph) 1200 { 1201 struct rtable *rt; /* Route to the other host */ 1202 int rc; 1203 int local; 1204 int rt_mode, was_input; 1205 1206 EnterFunction(10); 1207 1208 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1209 forwarded directly here, because there is no need to 1210 translate address/port back */ 1211 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1212 if (cp->packet_xmit) 1213 rc = cp->packet_xmit(skb, cp, pp, iph); 1214 else 1215 rc = NF_ACCEPT; 1216 /* do not touch skb anymore */ 1217 atomic_inc(&cp->in_pkts); 1218 goto out; 1219 } 1220 1221 /* 1222 * mangle and send the packet here (only for VS/NAT) 1223 */ 1224 was_input = rt_is_input_route(skb_rtable(skb)); 1225 1226 /* LOCALNODE from FORWARD hook is not supported */ 1227 rt_mode = (hooknum != NF_INET_FORWARD) ? 1228 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1229 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1230 rcu_read_lock(); 1231 local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, 1232 NULL, iph); 1233 if (local < 0) 1234 goto tx_error; 1235 rt = skb_rtable(skb); 1236 1237 /* 1238 * Avoid duplicate tuple in reply direction for NAT traffic 1239 * to local address when connection is sync-ed 1240 */ 1241 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1242 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1243 enum ip_conntrack_info ctinfo; 1244 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 1245 1246 if (ct && !nf_ct_is_untracked(ct)) { 1247 IP_VS_DBG(10, "%s(): " 1248 "stopping DNAT to local address %pI4\n", 1249 __func__, &cp->daddr.ip); 1250 goto tx_error; 1251 } 1252 } 1253 #endif 1254 1255 /* From world but DNAT to loopback address? */ 1256 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { 1257 IP_VS_DBG(1, "%s(): " 1258 "stopping DNAT to loopback %pI4\n", 1259 __func__, &cp->daddr.ip); 1260 goto tx_error; 1261 } 1262 1263 /* copy-on-write the packet before mangling it */ 1264 if (!skb_make_writable(skb, offset)) 1265 goto tx_error; 1266 1267 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1268 goto tx_error; 1269 1270 ip_vs_nat_icmp(skb, pp, cp, 0); 1271 1272 /* Another hack: avoid icmp_send in ip_fragment */ 1273 skb->ignore_df = 1; 1274 1275 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); 1276 rcu_read_unlock(); 1277 goto out; 1278 1279 tx_error: 1280 kfree_skb(skb); 1281 rcu_read_unlock(); 1282 rc = NF_STOLEN; 1283 out: 1284 LeaveFunction(10); 1285 return rc; 1286 } 1287 1288 #ifdef CONFIG_IP_VS_IPV6 1289 int 1290 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1291 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1292 struct ip_vs_iphdr *ipvsh) 1293 { 1294 struct rt6_info *rt; /* Route to the other host */ 1295 int rc; 1296 int local; 1297 int rt_mode; 1298 1299 EnterFunction(10); 1300 1301 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1302 forwarded directly here, because there is no need to 1303 translate address/port back */ 1304 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1305 if (cp->packet_xmit) 1306 rc = cp->packet_xmit(skb, cp, pp, ipvsh); 1307 else 1308 rc = NF_ACCEPT; 1309 /* do not touch skb anymore */ 1310 atomic_inc(&cp->in_pkts); 1311 goto out; 1312 } 1313 1314 /* 1315 * mangle and send the packet here (only for VS/NAT) 1316 */ 1317 1318 /* LOCALNODE from FORWARD hook is not supported */ 1319 rt_mode = (hooknum != NF_INET_FORWARD) ? 1320 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1321 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1322 rcu_read_lock(); 1323 local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, 1324 NULL, ipvsh, 0, rt_mode); 1325 if (local < 0) 1326 goto tx_error; 1327 rt = (struct rt6_info *) skb_dst(skb); 1328 /* 1329 * Avoid duplicate tuple in reply direction for NAT traffic 1330 * to local address when connection is sync-ed 1331 */ 1332 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1333 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1334 enum ip_conntrack_info ctinfo; 1335 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 1336 1337 if (ct && !nf_ct_is_untracked(ct)) { 1338 IP_VS_DBG(10, "%s(): " 1339 "stopping DNAT to local address %pI6\n", 1340 __func__, &cp->daddr.in6); 1341 goto tx_error; 1342 } 1343 } 1344 #endif 1345 1346 /* From world but DNAT to loopback address? */ 1347 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 1348 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { 1349 IP_VS_DBG(1, "%s(): " 1350 "stopping DNAT to loopback %pI6\n", 1351 __func__, &cp->daddr.in6); 1352 goto tx_error; 1353 } 1354 1355 /* copy-on-write the packet before mangling it */ 1356 if (!skb_make_writable(skb, offset)) 1357 goto tx_error; 1358 1359 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1360 goto tx_error; 1361 1362 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1363 1364 /* Another hack: avoid icmp_send in ip_fragment */ 1365 skb->ignore_df = 1; 1366 1367 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); 1368 rcu_read_unlock(); 1369 goto out; 1370 1371 tx_error: 1372 kfree_skb(skb); 1373 rcu_read_unlock(); 1374 rc = NF_STOLEN; 1375 out: 1376 LeaveFunction(10); 1377 return rc; 1378 } 1379 #endif 1380