1 /* 2 * ip_vs_xmit.c: various packet transmitters for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * Julian Anastasov <ja@ssi.bg> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Changes: 13 * 14 * Description of forwarding methods: 15 * - all transmitters are called from LOCAL_IN (remote clients) and 16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD 17 * - not all connections have destination server, for example, 18 * connections in backup server when fwmark is used 19 * - bypass connections use daddr from packet 20 * - we can use dst without ref while sending in RCU section, we use 21 * ref when returning NF_ACCEPT for NAT-ed packet via loopback 22 * LOCAL_OUT rules: 23 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) 24 * - skb->pkt_type is not set yet 25 * - the only place where we can see skb->sk != NULL 26 */ 27 28 #define KMSG_COMPONENT "IPVS" 29 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 30 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <linux/tcp.h> /* for tcphdr */ 34 #include <net/ip.h> 35 #include <net/tcp.h> /* for csum_tcpudp_magic */ 36 #include <net/udp.h> 37 #include <net/icmp.h> /* for icmp_send */ 38 #include <net/route.h> /* for ip_route_output */ 39 #include <net/ipv6.h> 40 #include <net/ip6_route.h> 41 #include <net/ip_tunnels.h> 42 #include <net/addrconf.h> 43 #include <linux/icmpv6.h> 44 #include <linux/netfilter.h> 45 #include <linux/netfilter_ipv4.h> 46 47 #include <net/ip_vs.h> 48 49 enum { 50 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ 51 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ 52 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to 53 * local 54 */ 55 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ 56 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ 57 IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ 58 }; 59 60 static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) 61 { 62 return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); 63 } 64 65 static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) 66 { 67 kfree(dest_dst); 68 } 69 70 /* 71 * Destination cache to speed up outgoing route lookup 72 */ 73 static inline void 74 __ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, 75 struct dst_entry *dst, u32 dst_cookie) 76 { 77 struct ip_vs_dest_dst *old; 78 79 old = rcu_dereference_protected(dest->dest_dst, 80 lockdep_is_held(&dest->dst_lock)); 81 82 if (dest_dst) { 83 dest_dst->dst_cache = dst; 84 dest_dst->dst_cookie = dst_cookie; 85 } 86 rcu_assign_pointer(dest->dest_dst, dest_dst); 87 88 if (old) 89 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 90 } 91 92 static inline struct ip_vs_dest_dst * 93 __ip_vs_dst_check(struct ip_vs_dest *dest) 94 { 95 struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); 96 struct dst_entry *dst; 97 98 if (!dest_dst) 99 return NULL; 100 dst = dest_dst->dst_cache; 101 if (dst->obsolete && 102 dst->ops->check(dst, dest_dst->dst_cookie) == NULL) 103 return NULL; 104 return dest_dst; 105 } 106 107 static inline bool 108 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) 109 { 110 if (IP6CB(skb)->frag_max_size) { 111 /* frag_max_size tell us that, this packet have been 112 * defragmented by netfilter IPv6 conntrack module. 113 */ 114 if (IP6CB(skb)->frag_max_size > mtu) 115 return true; /* largest fragment violate MTU */ 116 } 117 else if (skb->len > mtu && !skb_is_gso(skb)) { 118 return true; /* Packet size violate MTU size */ 119 } 120 return false; 121 } 122 123 /* Get route to daddr, update *saddr, optionally bind route to saddr */ 124 static struct rtable *do_output_route4(struct net *net, __be32 daddr, 125 int rt_mode, __be32 *saddr) 126 { 127 struct flowi4 fl4; 128 struct rtable *rt; 129 int loop = 0; 130 131 memset(&fl4, 0, sizeof(fl4)); 132 fl4.daddr = daddr; 133 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 134 FLOWI_FLAG_KNOWN_NH : 0; 135 136 retry: 137 rt = ip_route_output_key(net, &fl4); 138 if (IS_ERR(rt)) { 139 /* Invalid saddr ? */ 140 if (PTR_ERR(rt) == -EINVAL && *saddr && 141 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { 142 *saddr = 0; 143 flowi4_update_output(&fl4, 0, 0, daddr, 0); 144 goto retry; 145 } 146 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); 147 return NULL; 148 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { 149 ip_rt_put(rt); 150 *saddr = fl4.saddr; 151 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); 152 loop++; 153 goto retry; 154 } 155 *saddr = fl4.saddr; 156 return rt; 157 } 158 159 #ifdef CONFIG_IP_VS_IPV6 160 static inline int __ip_vs_is_local_route6(struct rt6_info *rt) 161 { 162 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; 163 } 164 #endif 165 166 static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, 167 int rt_mode, 168 bool new_rt_is_local) 169 { 170 bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); 171 bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); 172 bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR); 173 bool source_is_loopback; 174 bool old_rt_is_local; 175 176 #ifdef CONFIG_IP_VS_IPV6 177 if (skb_af == AF_INET6) { 178 int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); 179 180 source_is_loopback = 181 (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 182 (addr_type & IPV6_ADDR_LOOPBACK); 183 old_rt_is_local = __ip_vs_is_local_route6( 184 (struct rt6_info *)skb_dst(skb)); 185 } else 186 #endif 187 { 188 source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr); 189 old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 190 } 191 192 if (unlikely(new_rt_is_local)) { 193 if (!rt_mode_allow_local) 194 return true; 195 if (!rt_mode_allow_redirect && !old_rt_is_local) 196 return true; 197 } else { 198 if (!rt_mode_allow_non_local) 199 return true; 200 if (source_is_loopback) 201 return true; 202 } 203 return false; 204 } 205 206 static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) 207 { 208 struct sock *sk = skb->sk; 209 struct rtable *ort = skb_rtable(skb); 210 211 if (!skb->dev && sk && sk_fullsock(sk)) 212 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); 213 } 214 215 static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, 216 int rt_mode, 217 struct ip_vs_iphdr *ipvsh, 218 struct sk_buff *skb, int mtu) 219 { 220 #ifdef CONFIG_IP_VS_IPV6 221 if (skb_af == AF_INET6) { 222 struct net *net = ipvs->net; 223 224 if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { 225 if (!skb->dev) 226 skb->dev = net->loopback_dev; 227 /* only send ICMP too big on first fragment */ 228 if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh)) 229 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 230 IP_VS_DBG(1, "frag needed for %pI6c\n", 231 &ipv6_hdr(skb)->saddr); 232 return false; 233 } 234 } else 235 #endif 236 { 237 /* If we're going to tunnel the packet and pmtu discovery 238 * is disabled, we'll just fragment it anyway 239 */ 240 if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs)) 241 return true; 242 243 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && 244 skb->len > mtu && !skb_is_gso(skb) && 245 !ip_vs_iph_icmp(ipvsh))) { 246 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 247 htonl(mtu)); 248 IP_VS_DBG(1, "frag needed for %pI4\n", 249 &ip_hdr(skb)->saddr); 250 return false; 251 } 252 } 253 254 return true; 255 } 256 257 /* Get route to destination or remote server */ 258 static int 259 __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, 260 struct ip_vs_dest *dest, 261 __be32 daddr, int rt_mode, __be32 *ret_saddr, 262 struct ip_vs_iphdr *ipvsh) 263 { 264 struct net *net = ipvs->net; 265 struct ip_vs_dest_dst *dest_dst; 266 struct rtable *rt; /* Route to the other host */ 267 int mtu; 268 int local, noref = 1; 269 270 if (dest) { 271 dest_dst = __ip_vs_dst_check(dest); 272 if (likely(dest_dst)) 273 rt = (struct rtable *) dest_dst->dst_cache; 274 else { 275 dest_dst = ip_vs_dest_dst_alloc(); 276 spin_lock_bh(&dest->dst_lock); 277 if (!dest_dst) { 278 __ip_vs_dst_set(dest, NULL, NULL, 0); 279 spin_unlock_bh(&dest->dst_lock); 280 goto err_unreach; 281 } 282 rt = do_output_route4(net, dest->addr.ip, rt_mode, 283 &dest_dst->dst_saddr.ip); 284 if (!rt) { 285 __ip_vs_dst_set(dest, NULL, NULL, 0); 286 spin_unlock_bh(&dest->dst_lock); 287 ip_vs_dest_dst_free(dest_dst); 288 goto err_unreach; 289 } 290 __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); 291 spin_unlock_bh(&dest->dst_lock); 292 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", 293 &dest->addr.ip, &dest_dst->dst_saddr.ip, 294 atomic_read(&rt->dst.__refcnt)); 295 } 296 if (ret_saddr) 297 *ret_saddr = dest_dst->dst_saddr.ip; 298 } else { 299 __be32 saddr = htonl(INADDR_ANY); 300 301 noref = 0; 302 303 /* For such unconfigured boxes avoid many route lookups 304 * for performance reasons because we do not remember saddr 305 */ 306 rt_mode &= ~IP_VS_RT_MODE_CONNECT; 307 rt = do_output_route4(net, daddr, rt_mode, &saddr); 308 if (!rt) 309 goto err_unreach; 310 if (ret_saddr) 311 *ret_saddr = saddr; 312 } 313 314 local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; 315 if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, 316 local))) { 317 IP_VS_DBG_RL("We are crossing local and non-local addresses" 318 " daddr=%pI4\n", &daddr); 319 goto err_put; 320 } 321 322 if (unlikely(local)) { 323 /* skb to local stack, preserve old route */ 324 if (!noref) 325 ip_rt_put(rt); 326 return local; 327 } 328 329 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { 330 mtu = dst_mtu(&rt->dst); 331 } else { 332 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 333 if (mtu < 68) { 334 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 335 goto err_put; 336 } 337 maybe_update_pmtu(skb_af, skb, mtu); 338 } 339 340 if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) 341 goto err_put; 342 343 skb_dst_drop(skb); 344 if (noref) { 345 if (!local) 346 skb_dst_set_noref(skb, &rt->dst); 347 else 348 skb_dst_set(skb, dst_clone(&rt->dst)); 349 } else 350 skb_dst_set(skb, &rt->dst); 351 352 return local; 353 354 err_put: 355 if (!noref) 356 ip_rt_put(rt); 357 return -1; 358 359 err_unreach: 360 dst_link_failure(skb); 361 return -1; 362 } 363 364 #ifdef CONFIG_IP_VS_IPV6 365 static struct dst_entry * 366 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, 367 struct in6_addr *ret_saddr, int do_xfrm, int rt_mode) 368 { 369 struct dst_entry *dst; 370 struct flowi6 fl6 = { 371 .daddr = *daddr, 372 }; 373 374 if (rt_mode & IP_VS_RT_MODE_KNOWN_NH) 375 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; 376 377 dst = ip6_route_output(net, NULL, &fl6); 378 if (dst->error) 379 goto out_err; 380 if (!ret_saddr) 381 return dst; 382 if (ipv6_addr_any(&fl6.saddr) && 383 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, 384 &fl6.daddr, 0, &fl6.saddr) < 0) 385 goto out_err; 386 if (do_xfrm) { 387 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); 388 if (IS_ERR(dst)) { 389 dst = NULL; 390 goto out_err; 391 } 392 } 393 *ret_saddr = fl6.saddr; 394 return dst; 395 396 out_err: 397 dst_release(dst); 398 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); 399 return NULL; 400 } 401 402 /* 403 * Get route to destination or remote server 404 */ 405 static int 406 __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, 407 struct ip_vs_dest *dest, 408 struct in6_addr *daddr, struct in6_addr *ret_saddr, 409 struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) 410 { 411 struct net *net = ipvs->net; 412 struct ip_vs_dest_dst *dest_dst; 413 struct rt6_info *rt; /* Route to the other host */ 414 struct dst_entry *dst; 415 int mtu; 416 int local, noref = 1; 417 418 if (dest) { 419 dest_dst = __ip_vs_dst_check(dest); 420 if (likely(dest_dst)) 421 rt = (struct rt6_info *) dest_dst->dst_cache; 422 else { 423 u32 cookie; 424 425 dest_dst = ip_vs_dest_dst_alloc(); 426 spin_lock_bh(&dest->dst_lock); 427 if (!dest_dst) { 428 __ip_vs_dst_set(dest, NULL, NULL, 0); 429 spin_unlock_bh(&dest->dst_lock); 430 goto err_unreach; 431 } 432 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 433 &dest_dst->dst_saddr.in6, 434 do_xfrm, rt_mode); 435 if (!dst) { 436 __ip_vs_dst_set(dest, NULL, NULL, 0); 437 spin_unlock_bh(&dest->dst_lock); 438 ip_vs_dest_dst_free(dest_dst); 439 goto err_unreach; 440 } 441 rt = (struct rt6_info *) dst; 442 cookie = rt6_get_cookie(rt); 443 __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); 444 spin_unlock_bh(&dest->dst_lock); 445 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 446 &dest->addr.in6, &dest_dst->dst_saddr.in6, 447 atomic_read(&rt->dst.__refcnt)); 448 } 449 if (ret_saddr) 450 *ret_saddr = dest_dst->dst_saddr.in6; 451 } else { 452 noref = 0; 453 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm, 454 rt_mode); 455 if (!dst) 456 goto err_unreach; 457 rt = (struct rt6_info *) dst; 458 } 459 460 local = __ip_vs_is_local_route6(rt); 461 462 if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, 463 local))) { 464 IP_VS_DBG_RL("We are crossing local and non-local addresses" 465 " daddr=%pI6\n", daddr); 466 goto err_put; 467 } 468 469 if (unlikely(local)) { 470 /* skb to local stack, preserve old route */ 471 if (!noref) 472 dst_release(&rt->dst); 473 return local; 474 } 475 476 /* MTU checking */ 477 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) 478 mtu = dst_mtu(&rt->dst); 479 else { 480 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 481 if (mtu < IPV6_MIN_MTU) { 482 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, 483 IPV6_MIN_MTU); 484 goto err_put; 485 } 486 maybe_update_pmtu(skb_af, skb, mtu); 487 } 488 489 if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) 490 goto err_put; 491 492 skb_dst_drop(skb); 493 if (noref) { 494 if (!local) 495 skb_dst_set_noref(skb, &rt->dst); 496 else 497 skb_dst_set(skb, dst_clone(&rt->dst)); 498 } else 499 skb_dst_set(skb, &rt->dst); 500 501 return local; 502 503 err_put: 504 if (!noref) 505 dst_release(&rt->dst); 506 return -1; 507 508 err_unreach: 509 /* The ip6_link_failure function requires the dev field to be set 510 * in order to get the net (further for the sake of fwmark 511 * reflection). 512 */ 513 if (!skb->dev) 514 skb->dev = skb_dst(skb)->dev; 515 516 dst_link_failure(skb); 517 return -1; 518 } 519 #endif 520 521 522 /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ 523 static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, 524 struct ip_vs_conn *cp) 525 { 526 int ret = NF_ACCEPT; 527 528 skb->ipvs_property = 1; 529 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) 530 ret = ip_vs_confirm_conntrack(skb); 531 if (ret == NF_ACCEPT) { 532 nf_reset(skb); 533 skb_forward_csum(skb); 534 } 535 return ret; 536 } 537 538 /* In the event of a remote destination, it's possible that we would have 539 * matches against an old socket (particularly a TIME-WAIT socket). This 540 * causes havoc down the line (ip_local_out et. al. expect regular sockets 541 * and invalid memory accesses will happen) so simply drop the association 542 * in this case. 543 */ 544 static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) 545 { 546 /* If dev is set, the packet came from the LOCAL_IN callback and 547 * not from a local TCP socket. 548 */ 549 if (skb->dev) 550 skb_orphan(skb); 551 } 552 553 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 554 static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, 555 struct ip_vs_conn *cp, int local) 556 { 557 int ret = NF_STOLEN; 558 559 skb->ipvs_property = 1; 560 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 561 ip_vs_notrack(skb); 562 else 563 ip_vs_update_conntrack(skb, cp, 1); 564 565 /* Remove the early_demux association unless it's bound for the 566 * exact same port and address on this host after translation. 567 */ 568 if (!local || cp->vport != cp->dport || 569 !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) 570 ip_vs_drop_early_demux_sk(skb); 571 572 if (!local) { 573 skb_forward_csum(skb); 574 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, 575 NULL, skb_dst(skb)->dev, dst_output); 576 } else 577 ret = NF_ACCEPT; 578 579 return ret; 580 } 581 582 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 583 static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, 584 struct ip_vs_conn *cp, int local) 585 { 586 int ret = NF_STOLEN; 587 588 skb->ipvs_property = 1; 589 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 590 ip_vs_notrack(skb); 591 if (!local) { 592 ip_vs_drop_early_demux_sk(skb); 593 skb_forward_csum(skb); 594 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, 595 NULL, skb_dst(skb)->dev, dst_output); 596 } else 597 ret = NF_ACCEPT; 598 return ret; 599 } 600 601 602 /* 603 * NULL transmitter (do nothing except return NF_ACCEPT) 604 */ 605 int 606 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 607 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 608 { 609 /* we do not touch skb and do not need pskb ptr */ 610 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 611 } 612 613 614 /* 615 * Bypass transmitter 616 * Let packets bypass the destination when the destination is not 617 * available, it may be only used in transparent cache cluster. 618 */ 619 int 620 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 621 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 622 { 623 struct iphdr *iph = ip_hdr(skb); 624 625 EnterFunction(10); 626 627 rcu_read_lock(); 628 if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr, 629 IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) 630 goto tx_error; 631 632 ip_send_check(iph); 633 634 /* Another hack: avoid icmp_send in ip_fragment */ 635 skb->ignore_df = 1; 636 637 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); 638 rcu_read_unlock(); 639 640 LeaveFunction(10); 641 return NF_STOLEN; 642 643 tx_error: 644 kfree_skb(skb); 645 rcu_read_unlock(); 646 LeaveFunction(10); 647 return NF_STOLEN; 648 } 649 650 #ifdef CONFIG_IP_VS_IPV6 651 int 652 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 653 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 654 { 655 struct ipv6hdr *iph = ipv6_hdr(skb); 656 657 EnterFunction(10); 658 659 rcu_read_lock(); 660 if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL, 661 &iph->daddr, NULL, 662 ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) 663 goto tx_error; 664 665 /* Another hack: avoid icmp_send in ip_fragment */ 666 skb->ignore_df = 1; 667 668 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); 669 rcu_read_unlock(); 670 671 LeaveFunction(10); 672 return NF_STOLEN; 673 674 tx_error: 675 kfree_skb(skb); 676 rcu_read_unlock(); 677 LeaveFunction(10); 678 return NF_STOLEN; 679 } 680 #endif 681 682 /* 683 * NAT transmitter (only for outside-to-inside nat forwarding) 684 * Not used for related ICMP 685 */ 686 int 687 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 688 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 689 { 690 struct rtable *rt; /* Route to the other host */ 691 int local, rc, was_input; 692 693 EnterFunction(10); 694 695 rcu_read_lock(); 696 /* check if it is a connection of no-client-port */ 697 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 698 __be16 _pt, *p; 699 700 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); 701 if (p == NULL) 702 goto tx_error; 703 ip_vs_conn_fill_cport(cp, *p); 704 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 705 } 706 707 was_input = rt_is_input_route(skb_rtable(skb)); 708 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, 709 IP_VS_RT_MODE_LOCAL | 710 IP_VS_RT_MODE_NON_LOCAL | 711 IP_VS_RT_MODE_RDR, NULL, ipvsh); 712 if (local < 0) 713 goto tx_error; 714 rt = skb_rtable(skb); 715 /* 716 * Avoid duplicate tuple in reply direction for NAT traffic 717 * to local address when connection is sync-ed 718 */ 719 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 720 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 721 enum ip_conntrack_info ctinfo; 722 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 723 724 if (ct && !nf_ct_is_untracked(ct)) { 725 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off, 726 "ip_vs_nat_xmit(): " 727 "stopping DNAT to local address"); 728 goto tx_error; 729 } 730 } 731 #endif 732 733 /* From world but DNAT to loopback address? */ 734 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { 735 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off, 736 "ip_vs_nat_xmit(): stopping DNAT to loopback " 737 "address"); 738 goto tx_error; 739 } 740 741 /* copy-on-write the packet before mangling it */ 742 if (!skb_make_writable(skb, sizeof(struct iphdr))) 743 goto tx_error; 744 745 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 746 goto tx_error; 747 748 /* mangle the packet */ 749 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 750 goto tx_error; 751 ip_hdr(skb)->daddr = cp->daddr.ip; 752 ip_send_check(ip_hdr(skb)); 753 754 IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT"); 755 756 /* FIXME: when application helper enlarges the packet and the length 757 is larger than the MTU of outgoing device, there will be still 758 MTU problem. */ 759 760 /* Another hack: avoid icmp_send in ip_fragment */ 761 skb->ignore_df = 1; 762 763 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); 764 rcu_read_unlock(); 765 766 LeaveFunction(10); 767 return rc; 768 769 tx_error: 770 kfree_skb(skb); 771 rcu_read_unlock(); 772 LeaveFunction(10); 773 return NF_STOLEN; 774 } 775 776 #ifdef CONFIG_IP_VS_IPV6 777 int 778 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 779 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 780 { 781 struct rt6_info *rt; /* Route to the other host */ 782 int local, rc; 783 784 EnterFunction(10); 785 786 rcu_read_lock(); 787 /* check if it is a connection of no-client-port */ 788 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { 789 __be16 _pt, *p; 790 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); 791 if (p == NULL) 792 goto tx_error; 793 ip_vs_conn_fill_cport(cp, *p); 794 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 795 } 796 797 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, 798 &cp->daddr.in6, 799 NULL, ipvsh, 0, 800 IP_VS_RT_MODE_LOCAL | 801 IP_VS_RT_MODE_NON_LOCAL | 802 IP_VS_RT_MODE_RDR); 803 if (local < 0) 804 goto tx_error; 805 rt = (struct rt6_info *) skb_dst(skb); 806 /* 807 * Avoid duplicate tuple in reply direction for NAT traffic 808 * to local address when connection is sync-ed 809 */ 810 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 811 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 812 enum ip_conntrack_info ctinfo; 813 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 814 815 if (ct && !nf_ct_is_untracked(ct)) { 816 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off, 817 "ip_vs_nat_xmit_v6(): " 818 "stopping DNAT to local address"); 819 goto tx_error; 820 } 821 } 822 #endif 823 824 /* From world but DNAT to loopback address? */ 825 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 826 ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { 827 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off, 828 "ip_vs_nat_xmit_v6(): " 829 "stopping DNAT to loopback address"); 830 goto tx_error; 831 } 832 833 /* copy-on-write the packet before mangling it */ 834 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 835 goto tx_error; 836 837 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 838 goto tx_error; 839 840 /* mangle the packet */ 841 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 842 goto tx_error; 843 ipv6_hdr(skb)->daddr = cp->daddr.in6; 844 845 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT"); 846 847 /* FIXME: when application helper enlarges the packet and the length 848 is larger than the MTU of outgoing device, there will be still 849 MTU problem. */ 850 851 /* Another hack: avoid icmp_send in ip_fragment */ 852 skb->ignore_df = 1; 853 854 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); 855 rcu_read_unlock(); 856 857 LeaveFunction(10); 858 return rc; 859 860 tx_error: 861 LeaveFunction(10); 862 kfree_skb(skb); 863 rcu_read_unlock(); 864 return NF_STOLEN; 865 } 866 #endif 867 868 /* When forwarding a packet, we must ensure that we've got enough headroom 869 * for the encapsulation packet in the skb. This also gives us an 870 * opportunity to figure out what the payload_len, dsfield, ttl, and df 871 * values should be, so that we won't need to look at the old ip header 872 * again 873 */ 874 static struct sk_buff * 875 ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, 876 unsigned int max_headroom, __u8 *next_protocol, 877 __u32 *payload_len, __u8 *dsfield, __u8 *ttl, 878 __be16 *df) 879 { 880 struct sk_buff *new_skb = NULL; 881 struct iphdr *old_iph = NULL; 882 #ifdef CONFIG_IP_VS_IPV6 883 struct ipv6hdr *old_ipv6h = NULL; 884 #endif 885 886 ip_vs_drop_early_demux_sk(skb); 887 888 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { 889 new_skb = skb_realloc_headroom(skb, max_headroom); 890 if (!new_skb) 891 goto error; 892 if (skb->sk) 893 skb_set_owner_w(new_skb, skb->sk); 894 consume_skb(skb); 895 skb = new_skb; 896 } 897 898 #ifdef CONFIG_IP_VS_IPV6 899 if (skb_af == AF_INET6) { 900 old_ipv6h = ipv6_hdr(skb); 901 *next_protocol = IPPROTO_IPV6; 902 if (payload_len) 903 *payload_len = 904 ntohs(old_ipv6h->payload_len) + 905 sizeof(*old_ipv6h); 906 *dsfield = ipv6_get_dsfield(old_ipv6h); 907 *ttl = old_ipv6h->hop_limit; 908 if (df) 909 *df = 0; 910 } else 911 #endif 912 { 913 old_iph = ip_hdr(skb); 914 /* Copy DF, reset fragment offset and MF */ 915 if (df) 916 *df = (old_iph->frag_off & htons(IP_DF)); 917 *next_protocol = IPPROTO_IPIP; 918 919 /* fix old IP header checksum */ 920 ip_send_check(old_iph); 921 *dsfield = ipv4_get_dsfield(old_iph); 922 *ttl = old_iph->ttl; 923 if (payload_len) 924 *payload_len = ntohs(old_iph->tot_len); 925 } 926 927 return skb; 928 error: 929 kfree_skb(skb); 930 return ERR_PTR(-ENOMEM); 931 } 932 933 static inline int __tun_gso_type_mask(int encaps_af, int orig_af) 934 { 935 switch (encaps_af) { 936 case AF_INET: 937 return SKB_GSO_IPXIP4; 938 case AF_INET6: 939 return SKB_GSO_IPXIP6; 940 default: 941 return 0; 942 } 943 } 944 945 /* 946 * IP Tunneling transmitter 947 * 948 * This function encapsulates the packet in a new IP packet, its 949 * destination will be set to cp->daddr. Most code of this function 950 * is taken from ipip.c. 951 * 952 * It is used in VS/TUN cluster. The load balancer selects a real 953 * server from a cluster based on a scheduling algorithm, 954 * encapsulates the request packet and forwards it to the selected 955 * server. For example, all real servers are configured with 956 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives 957 * the encapsulated packet, it will decapsulate the packet, processe 958 * the request and return the response packets directly to the client 959 * without passing the load balancer. This can greatly increase the 960 * scalability of virtual server. 961 * 962 * Used for ANY protocol 963 */ 964 int 965 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 966 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 967 { 968 struct netns_ipvs *ipvs = cp->ipvs; 969 struct net *net = ipvs->net; 970 struct rtable *rt; /* Route to the other host */ 971 __be32 saddr; /* Source for tunnel */ 972 struct net_device *tdev; /* Device to other host */ 973 __u8 next_protocol = 0; 974 __u8 dsfield = 0; 975 __u8 ttl = 0; 976 __be16 df = 0; 977 __be16 *dfp = NULL; 978 struct iphdr *iph; /* Our new IP header */ 979 unsigned int max_headroom; /* The extra header space needed */ 980 int ret, local; 981 982 EnterFunction(10); 983 984 rcu_read_lock(); 985 local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip, 986 IP_VS_RT_MODE_LOCAL | 987 IP_VS_RT_MODE_NON_LOCAL | 988 IP_VS_RT_MODE_CONNECT | 989 IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh); 990 if (local < 0) 991 goto tx_error; 992 if (local) { 993 rcu_read_unlock(); 994 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 995 } 996 997 rt = skb_rtable(skb); 998 tdev = rt->dst.dev; 999 1000 /* 1001 * Okay, now see if we can stuff it in the buffer as-is. 1002 */ 1003 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); 1004 1005 /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ 1006 dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; 1007 skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, 1008 &next_protocol, NULL, &dsfield, 1009 &ttl, dfp); 1010 if (IS_ERR(skb)) 1011 goto tx_error; 1012 1013 if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af))) 1014 goto tx_error; 1015 1016 skb->transport_header = skb->network_header; 1017 1018 skb_push(skb, sizeof(struct iphdr)); 1019 skb_reset_network_header(skb); 1020 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1021 1022 /* 1023 * Push down and install the IPIP header. 1024 */ 1025 iph = ip_hdr(skb); 1026 iph->version = 4; 1027 iph->ihl = sizeof(struct iphdr)>>2; 1028 iph->frag_off = df; 1029 iph->protocol = next_protocol; 1030 iph->tos = dsfield; 1031 iph->daddr = cp->daddr.ip; 1032 iph->saddr = saddr; 1033 iph->ttl = ttl; 1034 ip_select_ident(net, skb, NULL); 1035 1036 /* Another hack: avoid icmp_send in ip_fragment */ 1037 skb->ignore_df = 1; 1038 1039 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 1040 if (ret == NF_ACCEPT) 1041 ip_local_out(net, skb->sk, skb); 1042 else if (ret == NF_DROP) 1043 kfree_skb(skb); 1044 rcu_read_unlock(); 1045 1046 LeaveFunction(10); 1047 1048 return NF_STOLEN; 1049 1050 tx_error: 1051 if (!IS_ERR(skb)) 1052 kfree_skb(skb); 1053 rcu_read_unlock(); 1054 LeaveFunction(10); 1055 return NF_STOLEN; 1056 } 1057 1058 #ifdef CONFIG_IP_VS_IPV6 1059 int 1060 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1061 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1062 { 1063 struct rt6_info *rt; /* Route to the other host */ 1064 struct in6_addr saddr; /* Source for tunnel */ 1065 struct net_device *tdev; /* Device to other host */ 1066 __u8 next_protocol = 0; 1067 __u32 payload_len = 0; 1068 __u8 dsfield = 0; 1069 __u8 ttl = 0; 1070 struct ipv6hdr *iph; /* Our new IP header */ 1071 unsigned int max_headroom; /* The extra header space needed */ 1072 int ret, local; 1073 1074 EnterFunction(10); 1075 1076 rcu_read_lock(); 1077 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, 1078 &cp->daddr.in6, 1079 &saddr, ipvsh, 1, 1080 IP_VS_RT_MODE_LOCAL | 1081 IP_VS_RT_MODE_NON_LOCAL | 1082 IP_VS_RT_MODE_TUNNEL); 1083 if (local < 0) 1084 goto tx_error; 1085 if (local) { 1086 rcu_read_unlock(); 1087 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); 1088 } 1089 1090 rt = (struct rt6_info *) skb_dst(skb); 1091 tdev = rt->dst.dev; 1092 1093 /* 1094 * Okay, now see if we can stuff it in the buffer as-is. 1095 */ 1096 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); 1097 1098 skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, 1099 &next_protocol, &payload_len, 1100 &dsfield, &ttl, NULL); 1101 if (IS_ERR(skb)) 1102 goto tx_error; 1103 1104 if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af))) 1105 goto tx_error; 1106 1107 skb->transport_header = skb->network_header; 1108 1109 skb_push(skb, sizeof(struct ipv6hdr)); 1110 skb_reset_network_header(skb); 1111 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1112 1113 /* 1114 * Push down and install the IPIP header. 1115 */ 1116 iph = ipv6_hdr(skb); 1117 iph->version = 6; 1118 iph->nexthdr = next_protocol; 1119 iph->payload_len = htons(payload_len); 1120 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 1121 ipv6_change_dsfield(iph, 0, dsfield); 1122 iph->daddr = cp->daddr.in6; 1123 iph->saddr = saddr; 1124 iph->hop_limit = ttl; 1125 1126 /* Another hack: avoid icmp_send in ip_fragment */ 1127 skb->ignore_df = 1; 1128 1129 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 1130 if (ret == NF_ACCEPT) 1131 ip6_local_out(cp->ipvs->net, skb->sk, skb); 1132 else if (ret == NF_DROP) 1133 kfree_skb(skb); 1134 rcu_read_unlock(); 1135 1136 LeaveFunction(10); 1137 1138 return NF_STOLEN; 1139 1140 tx_error: 1141 if (!IS_ERR(skb)) 1142 kfree_skb(skb); 1143 rcu_read_unlock(); 1144 LeaveFunction(10); 1145 return NF_STOLEN; 1146 } 1147 #endif 1148 1149 1150 /* 1151 * Direct Routing transmitter 1152 * Used for ANY protocol 1153 */ 1154 int 1155 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1156 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1157 { 1158 int local; 1159 1160 EnterFunction(10); 1161 1162 rcu_read_lock(); 1163 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, 1164 IP_VS_RT_MODE_LOCAL | 1165 IP_VS_RT_MODE_NON_LOCAL | 1166 IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); 1167 if (local < 0) 1168 goto tx_error; 1169 if (local) { 1170 rcu_read_unlock(); 1171 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 1172 } 1173 1174 ip_send_check(ip_hdr(skb)); 1175 1176 /* Another hack: avoid icmp_send in ip_fragment */ 1177 skb->ignore_df = 1; 1178 1179 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); 1180 rcu_read_unlock(); 1181 1182 LeaveFunction(10); 1183 return NF_STOLEN; 1184 1185 tx_error: 1186 kfree_skb(skb); 1187 rcu_read_unlock(); 1188 LeaveFunction(10); 1189 return NF_STOLEN; 1190 } 1191 1192 #ifdef CONFIG_IP_VS_IPV6 1193 int 1194 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1195 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1196 { 1197 int local; 1198 1199 EnterFunction(10); 1200 1201 rcu_read_lock(); 1202 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, 1203 &cp->daddr.in6, 1204 NULL, ipvsh, 0, 1205 IP_VS_RT_MODE_LOCAL | 1206 IP_VS_RT_MODE_NON_LOCAL | 1207 IP_VS_RT_MODE_KNOWN_NH); 1208 if (local < 0) 1209 goto tx_error; 1210 if (local) { 1211 rcu_read_unlock(); 1212 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); 1213 } 1214 1215 /* Another hack: avoid icmp_send in ip_fragment */ 1216 skb->ignore_df = 1; 1217 1218 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); 1219 rcu_read_unlock(); 1220 1221 LeaveFunction(10); 1222 return NF_STOLEN; 1223 1224 tx_error: 1225 kfree_skb(skb); 1226 rcu_read_unlock(); 1227 LeaveFunction(10); 1228 return NF_STOLEN; 1229 } 1230 #endif 1231 1232 1233 /* 1234 * ICMP packet transmitter 1235 * called by the ip_vs_in_icmp 1236 */ 1237 int 1238 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1239 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1240 struct ip_vs_iphdr *iph) 1241 { 1242 struct rtable *rt; /* Route to the other host */ 1243 int rc; 1244 int local; 1245 int rt_mode, was_input; 1246 1247 EnterFunction(10); 1248 1249 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1250 forwarded directly here, because there is no need to 1251 translate address/port back */ 1252 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1253 if (cp->packet_xmit) 1254 rc = cp->packet_xmit(skb, cp, pp, iph); 1255 else 1256 rc = NF_ACCEPT; 1257 /* do not touch skb anymore */ 1258 atomic_inc(&cp->in_pkts); 1259 goto out; 1260 } 1261 1262 /* 1263 * mangle and send the packet here (only for VS/NAT) 1264 */ 1265 was_input = rt_is_input_route(skb_rtable(skb)); 1266 1267 /* LOCALNODE from FORWARD hook is not supported */ 1268 rt_mode = (hooknum != NF_INET_FORWARD) ? 1269 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1270 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1271 rcu_read_lock(); 1272 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, 1273 NULL, iph); 1274 if (local < 0) 1275 goto tx_error; 1276 rt = skb_rtable(skb); 1277 1278 /* 1279 * Avoid duplicate tuple in reply direction for NAT traffic 1280 * to local address when connection is sync-ed 1281 */ 1282 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1283 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1284 enum ip_conntrack_info ctinfo; 1285 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 1286 1287 if (ct && !nf_ct_is_untracked(ct)) { 1288 IP_VS_DBG(10, "%s(): " 1289 "stopping DNAT to local address %pI4\n", 1290 __func__, &cp->daddr.ip); 1291 goto tx_error; 1292 } 1293 } 1294 #endif 1295 1296 /* From world but DNAT to loopback address? */ 1297 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { 1298 IP_VS_DBG(1, "%s(): " 1299 "stopping DNAT to loopback %pI4\n", 1300 __func__, &cp->daddr.ip); 1301 goto tx_error; 1302 } 1303 1304 /* copy-on-write the packet before mangling it */ 1305 if (!skb_make_writable(skb, offset)) 1306 goto tx_error; 1307 1308 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1309 goto tx_error; 1310 1311 ip_vs_nat_icmp(skb, pp, cp, 0); 1312 1313 /* Another hack: avoid icmp_send in ip_fragment */ 1314 skb->ignore_df = 1; 1315 1316 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); 1317 rcu_read_unlock(); 1318 goto out; 1319 1320 tx_error: 1321 kfree_skb(skb); 1322 rcu_read_unlock(); 1323 rc = NF_STOLEN; 1324 out: 1325 LeaveFunction(10); 1326 return rc; 1327 } 1328 1329 #ifdef CONFIG_IP_VS_IPV6 1330 int 1331 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1332 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1333 struct ip_vs_iphdr *ipvsh) 1334 { 1335 struct rt6_info *rt; /* Route to the other host */ 1336 int rc; 1337 int local; 1338 int rt_mode; 1339 1340 EnterFunction(10); 1341 1342 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1343 forwarded directly here, because there is no need to 1344 translate address/port back */ 1345 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1346 if (cp->packet_xmit) 1347 rc = cp->packet_xmit(skb, cp, pp, ipvsh); 1348 else 1349 rc = NF_ACCEPT; 1350 /* do not touch skb anymore */ 1351 atomic_inc(&cp->in_pkts); 1352 goto out; 1353 } 1354 1355 /* 1356 * mangle and send the packet here (only for VS/NAT) 1357 */ 1358 1359 /* LOCALNODE from FORWARD hook is not supported */ 1360 rt_mode = (hooknum != NF_INET_FORWARD) ? 1361 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1362 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1363 rcu_read_lock(); 1364 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, 1365 &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); 1366 if (local < 0) 1367 goto tx_error; 1368 rt = (struct rt6_info *) skb_dst(skb); 1369 /* 1370 * Avoid duplicate tuple in reply direction for NAT traffic 1371 * to local address when connection is sync-ed 1372 */ 1373 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1374 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1375 enum ip_conntrack_info ctinfo; 1376 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 1377 1378 if (ct && !nf_ct_is_untracked(ct)) { 1379 IP_VS_DBG(10, "%s(): " 1380 "stopping DNAT to local address %pI6\n", 1381 __func__, &cp->daddr.in6); 1382 goto tx_error; 1383 } 1384 } 1385 #endif 1386 1387 /* From world but DNAT to loopback address? */ 1388 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 1389 ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { 1390 IP_VS_DBG(1, "%s(): " 1391 "stopping DNAT to loopback %pI6\n", 1392 __func__, &cp->daddr.in6); 1393 goto tx_error; 1394 } 1395 1396 /* copy-on-write the packet before mangling it */ 1397 if (!skb_make_writable(skb, offset)) 1398 goto tx_error; 1399 1400 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1401 goto tx_error; 1402 1403 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1404 1405 /* Another hack: avoid icmp_send in ip_fragment */ 1406 skb->ignore_df = 1; 1407 1408 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); 1409 rcu_read_unlock(); 1410 goto out; 1411 1412 tx_error: 1413 kfree_skb(skb); 1414 rcu_read_unlock(); 1415 rc = NF_STOLEN; 1416 out: 1417 LeaveFunction(10); 1418 return rc; 1419 } 1420 #endif 1421