1 /* 2 * ip_vs_xmit.c: various packet transmitters for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * Julian Anastasov <ja@ssi.bg> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Changes: 13 * 14 * Description of forwarding methods: 15 * - all transmitters are called from LOCAL_IN (remote clients) and 16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD 17 * - not all connections have destination server, for example, 18 * connections in backup server when fwmark is used 19 * - bypass connections use daddr from packet 20 * - we can use dst without ref while sending in RCU section, we use 21 * ref when returning NF_ACCEPT for NAT-ed packet via loopback 22 * LOCAL_OUT rules: 23 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) 24 * - skb->pkt_type is not set yet 25 * - the only place where we can see skb->sk != NULL 26 */ 27 28 #define KMSG_COMPONENT "IPVS" 29 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 30 31 #include <linux/kernel.h> 32 #include <linux/slab.h> 33 #include <linux/tcp.h> /* for tcphdr */ 34 #include <net/ip.h> 35 #include <net/tcp.h> /* for csum_tcpudp_magic */ 36 #include <net/udp.h> 37 #include <net/icmp.h> /* for icmp_send */ 38 #include <net/route.h> /* for ip_route_output */ 39 #include <net/ipv6.h> 40 #include <net/ip6_route.h> 41 #include <net/ip_tunnels.h> 42 #include <net/addrconf.h> 43 #include <linux/icmpv6.h> 44 #include <linux/netfilter.h> 45 #include <linux/netfilter_ipv4.h> 46 47 #include <net/ip_vs.h> 48 49 enum { 50 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ 51 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ 52 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to 53 * local 54 */ 55 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ 56 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ 57 IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ 58 }; 59 60 static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) 61 { 62 return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); 63 } 64 65 static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) 66 { 67 kfree(dest_dst); 68 } 69 70 /* 71 * Destination cache to speed up outgoing route lookup 72 */ 73 static inline void 74 __ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, 75 struct dst_entry *dst, u32 dst_cookie) 76 { 77 struct ip_vs_dest_dst *old; 78 79 old = rcu_dereference_protected(dest->dest_dst, 80 lockdep_is_held(&dest->dst_lock)); 81 82 if (dest_dst) { 83 dest_dst->dst_cache = dst; 84 dest_dst->dst_cookie = dst_cookie; 85 } 86 rcu_assign_pointer(dest->dest_dst, dest_dst); 87 88 if (old) 89 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 90 } 91 92 static inline struct ip_vs_dest_dst * 93 __ip_vs_dst_check(struct ip_vs_dest *dest) 94 { 95 struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); 96 struct dst_entry *dst; 97 98 if (!dest_dst) 99 return NULL; 100 dst = dest_dst->dst_cache; 101 if (dst->obsolete && 102 dst->ops->check(dst, dest_dst->dst_cookie) == NULL) 103 return NULL; 104 return dest_dst; 105 } 106 107 static inline bool 108 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) 109 { 110 if (IP6CB(skb)->frag_max_size) { 111 /* frag_max_size tell us that, this packet have been 112 * defragmented by netfilter IPv6 conntrack module. 113 */ 114 if (IP6CB(skb)->frag_max_size > mtu) 115 return true; /* largest fragment violate MTU */ 116 } 117 else if (skb->len > mtu && !skb_is_gso(skb)) { 118 return true; /* Packet size violate MTU size */ 119 } 120 return false; 121 } 122 123 /* Get route to daddr, update *saddr, optionally bind route to saddr */ 124 static struct rtable *do_output_route4(struct net *net, __be32 daddr, 125 int rt_mode, __be32 *saddr) 126 { 127 struct flowi4 fl4; 128 struct rtable *rt; 129 int loop = 0; 130 131 memset(&fl4, 0, sizeof(fl4)); 132 fl4.daddr = daddr; 133 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 134 FLOWI_FLAG_KNOWN_NH : 0; 135 136 retry: 137 rt = ip_route_output_key(net, &fl4); 138 if (IS_ERR(rt)) { 139 /* Invalid saddr ? */ 140 if (PTR_ERR(rt) == -EINVAL && *saddr && 141 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { 142 *saddr = 0; 143 flowi4_update_output(&fl4, 0, 0, daddr, 0); 144 goto retry; 145 } 146 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); 147 return NULL; 148 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { 149 ip_rt_put(rt); 150 *saddr = fl4.saddr; 151 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); 152 loop++; 153 goto retry; 154 } 155 *saddr = fl4.saddr; 156 return rt; 157 } 158 159 #ifdef CONFIG_IP_VS_IPV6 160 static inline int __ip_vs_is_local_route6(struct rt6_info *rt) 161 { 162 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; 163 } 164 #endif 165 166 static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, 167 int rt_mode, 168 bool new_rt_is_local) 169 { 170 bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); 171 bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL); 172 bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR); 173 bool source_is_loopback; 174 bool old_rt_is_local; 175 176 #ifdef CONFIG_IP_VS_IPV6 177 if (skb_af == AF_INET6) { 178 int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); 179 180 source_is_loopback = 181 (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 182 (addr_type & IPV6_ADDR_LOOPBACK); 183 old_rt_is_local = __ip_vs_is_local_route6( 184 (struct rt6_info *)skb_dst(skb)); 185 } else 186 #endif 187 { 188 source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr); 189 old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 190 } 191 192 if (unlikely(new_rt_is_local)) { 193 if (!rt_mode_allow_local) 194 return true; 195 if (!rt_mode_allow_redirect && !old_rt_is_local) 196 return true; 197 } else { 198 if (!rt_mode_allow_non_local) 199 return true; 200 if (source_is_loopback) 201 return true; 202 } 203 return false; 204 } 205 206 static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) 207 { 208 struct sock *sk = skb->sk; 209 struct rtable *ort = skb_rtable(skb); 210 211 if (!skb->dev && sk && sk_fullsock(sk)) 212 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); 213 } 214 215 static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, 216 int rt_mode, 217 struct ip_vs_iphdr *ipvsh, 218 struct sk_buff *skb, int mtu) 219 { 220 #ifdef CONFIG_IP_VS_IPV6 221 if (skb_af == AF_INET6) { 222 struct net *net = ipvs->net; 223 224 if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { 225 if (!skb->dev) 226 skb->dev = net->loopback_dev; 227 /* only send ICMP too big on first fragment */ 228 if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh)) 229 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 230 IP_VS_DBG(1, "frag needed for %pI6c\n", 231 &ipv6_hdr(skb)->saddr); 232 return false; 233 } 234 } else 235 #endif 236 { 237 /* If we're going to tunnel the packet and pmtu discovery 238 * is disabled, we'll just fragment it anyway 239 */ 240 if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs)) 241 return true; 242 243 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && 244 skb->len > mtu && !skb_is_gso(skb) && 245 !ip_vs_iph_icmp(ipvsh))) { 246 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 247 htonl(mtu)); 248 IP_VS_DBG(1, "frag needed for %pI4\n", 249 &ip_hdr(skb)->saddr); 250 return false; 251 } 252 } 253 254 return true; 255 } 256 257 static inline bool decrement_ttl(struct netns_ipvs *ipvs, 258 int skb_af, 259 struct sk_buff *skb) 260 { 261 struct net *net = ipvs->net; 262 263 #ifdef CONFIG_IP_VS_IPV6 264 if (skb_af == AF_INET6) { 265 struct dst_entry *dst = skb_dst(skb); 266 267 /* check and decrement ttl */ 268 if (ipv6_hdr(skb)->hop_limit <= 1) { 269 /* Force OUTPUT device used as source address */ 270 skb->dev = dst->dev; 271 icmpv6_send(skb, ICMPV6_TIME_EXCEED, 272 ICMPV6_EXC_HOPLIMIT, 0); 273 __IP6_INC_STATS(net, ip6_dst_idev(dst), 274 IPSTATS_MIB_INHDRERRORS); 275 276 return false; 277 } 278 279 /* don't propagate ttl change to cloned packets */ 280 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 281 return false; 282 283 ipv6_hdr(skb)->hop_limit--; 284 } else 285 #endif 286 { 287 if (ip_hdr(skb)->ttl <= 1) { 288 /* Tell the sender its packet died... */ 289 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 290 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); 291 return false; 292 } 293 294 /* don't propagate ttl change to cloned packets */ 295 if (!skb_make_writable(skb, sizeof(struct iphdr))) 296 return false; 297 298 /* Decrease ttl */ 299 ip_decrease_ttl(ip_hdr(skb)); 300 } 301 302 return true; 303 } 304 305 /* Get route to destination or remote server */ 306 static int 307 __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, 308 struct ip_vs_dest *dest, 309 __be32 daddr, int rt_mode, __be32 *ret_saddr, 310 struct ip_vs_iphdr *ipvsh) 311 { 312 struct net *net = ipvs->net; 313 struct ip_vs_dest_dst *dest_dst; 314 struct rtable *rt; /* Route to the other host */ 315 int mtu; 316 int local, noref = 1; 317 318 if (dest) { 319 dest_dst = __ip_vs_dst_check(dest); 320 if (likely(dest_dst)) 321 rt = (struct rtable *) dest_dst->dst_cache; 322 else { 323 dest_dst = ip_vs_dest_dst_alloc(); 324 spin_lock_bh(&dest->dst_lock); 325 if (!dest_dst) { 326 __ip_vs_dst_set(dest, NULL, NULL, 0); 327 spin_unlock_bh(&dest->dst_lock); 328 goto err_unreach; 329 } 330 rt = do_output_route4(net, dest->addr.ip, rt_mode, 331 &dest_dst->dst_saddr.ip); 332 if (!rt) { 333 __ip_vs_dst_set(dest, NULL, NULL, 0); 334 spin_unlock_bh(&dest->dst_lock); 335 ip_vs_dest_dst_free(dest_dst); 336 goto err_unreach; 337 } 338 __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); 339 spin_unlock_bh(&dest->dst_lock); 340 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", 341 &dest->addr.ip, &dest_dst->dst_saddr.ip, 342 atomic_read(&rt->dst.__refcnt)); 343 } 344 if (ret_saddr) 345 *ret_saddr = dest_dst->dst_saddr.ip; 346 } else { 347 __be32 saddr = htonl(INADDR_ANY); 348 349 noref = 0; 350 351 /* For such unconfigured boxes avoid many route lookups 352 * for performance reasons because we do not remember saddr 353 */ 354 rt_mode &= ~IP_VS_RT_MODE_CONNECT; 355 rt = do_output_route4(net, daddr, rt_mode, &saddr); 356 if (!rt) 357 goto err_unreach; 358 if (ret_saddr) 359 *ret_saddr = saddr; 360 } 361 362 local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; 363 if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, 364 local))) { 365 IP_VS_DBG_RL("We are crossing local and non-local addresses" 366 " daddr=%pI4\n", &daddr); 367 goto err_put; 368 } 369 370 if (unlikely(local)) { 371 /* skb to local stack, preserve old route */ 372 if (!noref) 373 ip_rt_put(rt); 374 return local; 375 } 376 377 if (!decrement_ttl(ipvs, skb_af, skb)) 378 goto err_put; 379 380 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { 381 mtu = dst_mtu(&rt->dst); 382 } else { 383 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 384 if (mtu < 68) { 385 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 386 goto err_put; 387 } 388 maybe_update_pmtu(skb_af, skb, mtu); 389 } 390 391 if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) 392 goto err_put; 393 394 skb_dst_drop(skb); 395 if (noref) { 396 if (!local) 397 skb_dst_set_noref(skb, &rt->dst); 398 else 399 skb_dst_set(skb, dst_clone(&rt->dst)); 400 } else 401 skb_dst_set(skb, &rt->dst); 402 403 return local; 404 405 err_put: 406 if (!noref) 407 ip_rt_put(rt); 408 return -1; 409 410 err_unreach: 411 dst_link_failure(skb); 412 return -1; 413 } 414 415 #ifdef CONFIG_IP_VS_IPV6 416 static struct dst_entry * 417 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, 418 struct in6_addr *ret_saddr, int do_xfrm, int rt_mode) 419 { 420 struct dst_entry *dst; 421 struct flowi6 fl6 = { 422 .daddr = *daddr, 423 }; 424 425 if (rt_mode & IP_VS_RT_MODE_KNOWN_NH) 426 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; 427 428 dst = ip6_route_output(net, NULL, &fl6); 429 if (dst->error) 430 goto out_err; 431 if (!ret_saddr) 432 return dst; 433 if (ipv6_addr_any(&fl6.saddr) && 434 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, 435 &fl6.daddr, 0, &fl6.saddr) < 0) 436 goto out_err; 437 if (do_xfrm) { 438 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); 439 if (IS_ERR(dst)) { 440 dst = NULL; 441 goto out_err; 442 } 443 } 444 *ret_saddr = fl6.saddr; 445 return dst; 446 447 out_err: 448 dst_release(dst); 449 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); 450 return NULL; 451 } 452 453 /* 454 * Get route to destination or remote server 455 */ 456 static int 457 __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, 458 struct ip_vs_dest *dest, 459 struct in6_addr *daddr, struct in6_addr *ret_saddr, 460 struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) 461 { 462 struct net *net = ipvs->net; 463 struct ip_vs_dest_dst *dest_dst; 464 struct rt6_info *rt; /* Route to the other host */ 465 struct dst_entry *dst; 466 int mtu; 467 int local, noref = 1; 468 469 if (dest) { 470 dest_dst = __ip_vs_dst_check(dest); 471 if (likely(dest_dst)) 472 rt = (struct rt6_info *) dest_dst->dst_cache; 473 else { 474 u32 cookie; 475 476 dest_dst = ip_vs_dest_dst_alloc(); 477 spin_lock_bh(&dest->dst_lock); 478 if (!dest_dst) { 479 __ip_vs_dst_set(dest, NULL, NULL, 0); 480 spin_unlock_bh(&dest->dst_lock); 481 goto err_unreach; 482 } 483 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 484 &dest_dst->dst_saddr.in6, 485 do_xfrm, rt_mode); 486 if (!dst) { 487 __ip_vs_dst_set(dest, NULL, NULL, 0); 488 spin_unlock_bh(&dest->dst_lock); 489 ip_vs_dest_dst_free(dest_dst); 490 goto err_unreach; 491 } 492 rt = (struct rt6_info *) dst; 493 cookie = rt6_get_cookie(rt); 494 __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); 495 spin_unlock_bh(&dest->dst_lock); 496 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 497 &dest->addr.in6, &dest_dst->dst_saddr.in6, 498 atomic_read(&rt->dst.__refcnt)); 499 } 500 if (ret_saddr) 501 *ret_saddr = dest_dst->dst_saddr.in6; 502 } else { 503 noref = 0; 504 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm, 505 rt_mode); 506 if (!dst) 507 goto err_unreach; 508 rt = (struct rt6_info *) dst; 509 } 510 511 local = __ip_vs_is_local_route6(rt); 512 513 if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, 514 local))) { 515 IP_VS_DBG_RL("We are crossing local and non-local addresses" 516 " daddr=%pI6\n", daddr); 517 goto err_put; 518 } 519 520 if (unlikely(local)) { 521 /* skb to local stack, preserve old route */ 522 if (!noref) 523 dst_release(&rt->dst); 524 return local; 525 } 526 527 if (!decrement_ttl(ipvs, skb_af, skb)) 528 goto err_put; 529 530 /* MTU checking */ 531 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) 532 mtu = dst_mtu(&rt->dst); 533 else { 534 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 535 if (mtu < IPV6_MIN_MTU) { 536 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, 537 IPV6_MIN_MTU); 538 goto err_put; 539 } 540 maybe_update_pmtu(skb_af, skb, mtu); 541 } 542 543 if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) 544 goto err_put; 545 546 skb_dst_drop(skb); 547 if (noref) { 548 if (!local) 549 skb_dst_set_noref(skb, &rt->dst); 550 else 551 skb_dst_set(skb, dst_clone(&rt->dst)); 552 } else 553 skb_dst_set(skb, &rt->dst); 554 555 return local; 556 557 err_put: 558 if (!noref) 559 dst_release(&rt->dst); 560 return -1; 561 562 err_unreach: 563 /* The ip6_link_failure function requires the dev field to be set 564 * in order to get the net (further for the sake of fwmark 565 * reflection). 566 */ 567 if (!skb->dev) 568 skb->dev = skb_dst(skb)->dev; 569 570 dst_link_failure(skb); 571 return -1; 572 } 573 #endif 574 575 576 /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ 577 static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, 578 struct ip_vs_conn *cp) 579 { 580 int ret = NF_ACCEPT; 581 582 skb->ipvs_property = 1; 583 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) 584 ret = ip_vs_confirm_conntrack(skb); 585 if (ret == NF_ACCEPT) { 586 nf_reset(skb); 587 skb_forward_csum(skb); 588 } 589 return ret; 590 } 591 592 /* In the event of a remote destination, it's possible that we would have 593 * matches against an old socket (particularly a TIME-WAIT socket). This 594 * causes havoc down the line (ip_local_out et. al. expect regular sockets 595 * and invalid memory accesses will happen) so simply drop the association 596 * in this case. 597 */ 598 static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) 599 { 600 /* If dev is set, the packet came from the LOCAL_IN callback and 601 * not from a local TCP socket. 602 */ 603 if (skb->dev) 604 skb_orphan(skb); 605 } 606 607 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 608 static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, 609 struct ip_vs_conn *cp, int local) 610 { 611 int ret = NF_STOLEN; 612 613 skb->ipvs_property = 1; 614 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 615 ip_vs_notrack(skb); 616 else 617 ip_vs_update_conntrack(skb, cp, 1); 618 619 /* Remove the early_demux association unless it's bound for the 620 * exact same port and address on this host after translation. 621 */ 622 if (!local || cp->vport != cp->dport || 623 !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) 624 ip_vs_drop_early_demux_sk(skb); 625 626 if (!local) { 627 skb_forward_csum(skb); 628 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, 629 NULL, skb_dst(skb)->dev, dst_output); 630 } else 631 ret = NF_ACCEPT; 632 633 return ret; 634 } 635 636 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 637 static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, 638 struct ip_vs_conn *cp, int local) 639 { 640 int ret = NF_STOLEN; 641 642 skb->ipvs_property = 1; 643 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 644 ip_vs_notrack(skb); 645 if (!local) { 646 ip_vs_drop_early_demux_sk(skb); 647 skb_forward_csum(skb); 648 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, 649 NULL, skb_dst(skb)->dev, dst_output); 650 } else 651 ret = NF_ACCEPT; 652 return ret; 653 } 654 655 656 /* 657 * NULL transmitter (do nothing except return NF_ACCEPT) 658 */ 659 int 660 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 661 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 662 { 663 /* we do not touch skb and do not need pskb ptr */ 664 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 665 } 666 667 668 /* 669 * Bypass transmitter 670 * Let packets bypass the destination when the destination is not 671 * available, it may be only used in transparent cache cluster. 672 */ 673 int 674 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 675 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 676 { 677 struct iphdr *iph = ip_hdr(skb); 678 679 EnterFunction(10); 680 681 if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr, 682 IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) 683 goto tx_error; 684 685 ip_send_check(iph); 686 687 /* Another hack: avoid icmp_send in ip_fragment */ 688 skb->ignore_df = 1; 689 690 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); 691 692 LeaveFunction(10); 693 return NF_STOLEN; 694 695 tx_error: 696 kfree_skb(skb); 697 LeaveFunction(10); 698 return NF_STOLEN; 699 } 700 701 #ifdef CONFIG_IP_VS_IPV6 702 int 703 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 704 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 705 { 706 struct ipv6hdr *iph = ipv6_hdr(skb); 707 708 EnterFunction(10); 709 710 if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL, 711 &iph->daddr, NULL, 712 ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) 713 goto tx_error; 714 715 /* Another hack: avoid icmp_send in ip_fragment */ 716 skb->ignore_df = 1; 717 718 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); 719 720 LeaveFunction(10); 721 return NF_STOLEN; 722 723 tx_error: 724 kfree_skb(skb); 725 LeaveFunction(10); 726 return NF_STOLEN; 727 } 728 #endif 729 730 /* 731 * NAT transmitter (only for outside-to-inside nat forwarding) 732 * Not used for related ICMP 733 */ 734 int 735 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 736 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 737 { 738 struct rtable *rt; /* Route to the other host */ 739 int local, rc, was_input; 740 741 EnterFunction(10); 742 743 /* check if it is a connection of no-client-port */ 744 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 745 __be16 _pt, *p; 746 747 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); 748 if (p == NULL) 749 goto tx_error; 750 ip_vs_conn_fill_cport(cp, *p); 751 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 752 } 753 754 was_input = rt_is_input_route(skb_rtable(skb)); 755 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, 756 IP_VS_RT_MODE_LOCAL | 757 IP_VS_RT_MODE_NON_LOCAL | 758 IP_VS_RT_MODE_RDR, NULL, ipvsh); 759 if (local < 0) 760 goto tx_error; 761 rt = skb_rtable(skb); 762 /* 763 * Avoid duplicate tuple in reply direction for NAT traffic 764 * to local address when connection is sync-ed 765 */ 766 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 767 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 768 enum ip_conntrack_info ctinfo; 769 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 770 771 if (ct) { 772 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off, 773 "ip_vs_nat_xmit(): " 774 "stopping DNAT to local address"); 775 goto tx_error; 776 } 777 } 778 #endif 779 780 /* From world but DNAT to loopback address? */ 781 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { 782 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off, 783 "ip_vs_nat_xmit(): stopping DNAT to loopback " 784 "address"); 785 goto tx_error; 786 } 787 788 /* copy-on-write the packet before mangling it */ 789 if (!skb_make_writable(skb, sizeof(struct iphdr))) 790 goto tx_error; 791 792 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 793 goto tx_error; 794 795 /* mangle the packet */ 796 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 797 goto tx_error; 798 ip_hdr(skb)->daddr = cp->daddr.ip; 799 ip_send_check(ip_hdr(skb)); 800 801 IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT"); 802 803 /* FIXME: when application helper enlarges the packet and the length 804 is larger than the MTU of outgoing device, there will be still 805 MTU problem. */ 806 807 /* Another hack: avoid icmp_send in ip_fragment */ 808 skb->ignore_df = 1; 809 810 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); 811 812 LeaveFunction(10); 813 return rc; 814 815 tx_error: 816 kfree_skb(skb); 817 LeaveFunction(10); 818 return NF_STOLEN; 819 } 820 821 #ifdef CONFIG_IP_VS_IPV6 822 int 823 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 824 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 825 { 826 struct rt6_info *rt; /* Route to the other host */ 827 int local, rc; 828 829 EnterFunction(10); 830 831 /* check if it is a connection of no-client-port */ 832 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { 833 __be16 _pt, *p; 834 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); 835 if (p == NULL) 836 goto tx_error; 837 ip_vs_conn_fill_cport(cp, *p); 838 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 839 } 840 841 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, 842 &cp->daddr.in6, 843 NULL, ipvsh, 0, 844 IP_VS_RT_MODE_LOCAL | 845 IP_VS_RT_MODE_NON_LOCAL | 846 IP_VS_RT_MODE_RDR); 847 if (local < 0) 848 goto tx_error; 849 rt = (struct rt6_info *) skb_dst(skb); 850 /* 851 * Avoid duplicate tuple in reply direction for NAT traffic 852 * to local address when connection is sync-ed 853 */ 854 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 855 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 856 enum ip_conntrack_info ctinfo; 857 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 858 859 if (ct) { 860 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off, 861 "ip_vs_nat_xmit_v6(): " 862 "stopping DNAT to local address"); 863 goto tx_error; 864 } 865 } 866 #endif 867 868 /* From world but DNAT to loopback address? */ 869 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 870 ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { 871 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off, 872 "ip_vs_nat_xmit_v6(): " 873 "stopping DNAT to loopback address"); 874 goto tx_error; 875 } 876 877 /* copy-on-write the packet before mangling it */ 878 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 879 goto tx_error; 880 881 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 882 goto tx_error; 883 884 /* mangle the packet */ 885 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 886 goto tx_error; 887 ipv6_hdr(skb)->daddr = cp->daddr.in6; 888 889 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT"); 890 891 /* FIXME: when application helper enlarges the packet and the length 892 is larger than the MTU of outgoing device, there will be still 893 MTU problem. */ 894 895 /* Another hack: avoid icmp_send in ip_fragment */ 896 skb->ignore_df = 1; 897 898 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); 899 900 LeaveFunction(10); 901 return rc; 902 903 tx_error: 904 LeaveFunction(10); 905 kfree_skb(skb); 906 return NF_STOLEN; 907 } 908 #endif 909 910 /* When forwarding a packet, we must ensure that we've got enough headroom 911 * for the encapsulation packet in the skb. This also gives us an 912 * opportunity to figure out what the payload_len, dsfield, ttl, and df 913 * values should be, so that we won't need to look at the old ip header 914 * again 915 */ 916 static struct sk_buff * 917 ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, 918 unsigned int max_headroom, __u8 *next_protocol, 919 __u32 *payload_len, __u8 *dsfield, __u8 *ttl, 920 __be16 *df) 921 { 922 struct sk_buff *new_skb = NULL; 923 struct iphdr *old_iph = NULL; 924 __u8 old_dsfield; 925 #ifdef CONFIG_IP_VS_IPV6 926 struct ipv6hdr *old_ipv6h = NULL; 927 #endif 928 929 ip_vs_drop_early_demux_sk(skb); 930 931 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { 932 new_skb = skb_realloc_headroom(skb, max_headroom); 933 if (!new_skb) 934 goto error; 935 if (skb->sk) 936 skb_set_owner_w(new_skb, skb->sk); 937 consume_skb(skb); 938 skb = new_skb; 939 } 940 941 #ifdef CONFIG_IP_VS_IPV6 942 if (skb_af == AF_INET6) { 943 old_ipv6h = ipv6_hdr(skb); 944 *next_protocol = IPPROTO_IPV6; 945 if (payload_len) 946 *payload_len = 947 ntohs(old_ipv6h->payload_len) + 948 sizeof(*old_ipv6h); 949 old_dsfield = ipv6_get_dsfield(old_ipv6h); 950 *ttl = old_ipv6h->hop_limit; 951 if (df) 952 *df = 0; 953 } else 954 #endif 955 { 956 old_iph = ip_hdr(skb); 957 /* Copy DF, reset fragment offset and MF */ 958 if (df) 959 *df = (old_iph->frag_off & htons(IP_DF)); 960 *next_protocol = IPPROTO_IPIP; 961 962 /* fix old IP header checksum */ 963 ip_send_check(old_iph); 964 old_dsfield = ipv4_get_dsfield(old_iph); 965 *ttl = old_iph->ttl; 966 if (payload_len) 967 *payload_len = ntohs(old_iph->tot_len); 968 } 969 970 /* Implement full-functionality option for ECN encapsulation */ 971 *dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield); 972 973 return skb; 974 error: 975 kfree_skb(skb); 976 return ERR_PTR(-ENOMEM); 977 } 978 979 static inline int __tun_gso_type_mask(int encaps_af, int orig_af) 980 { 981 switch (encaps_af) { 982 case AF_INET: 983 return SKB_GSO_IPXIP4; 984 case AF_INET6: 985 return SKB_GSO_IPXIP6; 986 default: 987 return 0; 988 } 989 } 990 991 /* 992 * IP Tunneling transmitter 993 * 994 * This function encapsulates the packet in a new IP packet, its 995 * destination will be set to cp->daddr. Most code of this function 996 * is taken from ipip.c. 997 * 998 * It is used in VS/TUN cluster. The load balancer selects a real 999 * server from a cluster based on a scheduling algorithm, 1000 * encapsulates the request packet and forwards it to the selected 1001 * server. For example, all real servers are configured with 1002 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives 1003 * the encapsulated packet, it will decapsulate the packet, processe 1004 * the request and return the response packets directly to the client 1005 * without passing the load balancer. This can greatly increase the 1006 * scalability of virtual server. 1007 * 1008 * Used for ANY protocol 1009 */ 1010 int 1011 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1012 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1013 { 1014 struct netns_ipvs *ipvs = cp->ipvs; 1015 struct net *net = ipvs->net; 1016 struct rtable *rt; /* Route to the other host */ 1017 __be32 saddr; /* Source for tunnel */ 1018 struct net_device *tdev; /* Device to other host */ 1019 __u8 next_protocol = 0; 1020 __u8 dsfield = 0; 1021 __u8 ttl = 0; 1022 __be16 df = 0; 1023 __be16 *dfp = NULL; 1024 struct iphdr *iph; /* Our new IP header */ 1025 unsigned int max_headroom; /* The extra header space needed */ 1026 int ret, local; 1027 1028 EnterFunction(10); 1029 1030 local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip, 1031 IP_VS_RT_MODE_LOCAL | 1032 IP_VS_RT_MODE_NON_LOCAL | 1033 IP_VS_RT_MODE_CONNECT | 1034 IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh); 1035 if (local < 0) 1036 goto tx_error; 1037 if (local) 1038 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 1039 1040 rt = skb_rtable(skb); 1041 tdev = rt->dst.dev; 1042 1043 /* 1044 * Okay, now see if we can stuff it in the buffer as-is. 1045 */ 1046 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); 1047 1048 /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ 1049 dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; 1050 skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, 1051 &next_protocol, NULL, &dsfield, 1052 &ttl, dfp); 1053 if (IS_ERR(skb)) 1054 goto tx_error; 1055 1056 if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af))) 1057 goto tx_error; 1058 1059 skb->transport_header = skb->network_header; 1060 1061 skb_push(skb, sizeof(struct iphdr)); 1062 skb_reset_network_header(skb); 1063 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1064 1065 /* 1066 * Push down and install the IPIP header. 1067 */ 1068 iph = ip_hdr(skb); 1069 iph->version = 4; 1070 iph->ihl = sizeof(struct iphdr)>>2; 1071 iph->frag_off = df; 1072 iph->protocol = next_protocol; 1073 iph->tos = dsfield; 1074 iph->daddr = cp->daddr.ip; 1075 iph->saddr = saddr; 1076 iph->ttl = ttl; 1077 ip_select_ident(net, skb, NULL); 1078 1079 /* Another hack: avoid icmp_send in ip_fragment */ 1080 skb->ignore_df = 1; 1081 1082 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 1083 if (ret == NF_ACCEPT) 1084 ip_local_out(net, skb->sk, skb); 1085 else if (ret == NF_DROP) 1086 kfree_skb(skb); 1087 1088 LeaveFunction(10); 1089 1090 return NF_STOLEN; 1091 1092 tx_error: 1093 if (!IS_ERR(skb)) 1094 kfree_skb(skb); 1095 LeaveFunction(10); 1096 return NF_STOLEN; 1097 } 1098 1099 #ifdef CONFIG_IP_VS_IPV6 1100 int 1101 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1102 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1103 { 1104 struct rt6_info *rt; /* Route to the other host */ 1105 struct in6_addr saddr; /* Source for tunnel */ 1106 struct net_device *tdev; /* Device to other host */ 1107 __u8 next_protocol = 0; 1108 __u32 payload_len = 0; 1109 __u8 dsfield = 0; 1110 __u8 ttl = 0; 1111 struct ipv6hdr *iph; /* Our new IP header */ 1112 unsigned int max_headroom; /* The extra header space needed */ 1113 int ret, local; 1114 1115 EnterFunction(10); 1116 1117 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, 1118 &cp->daddr.in6, 1119 &saddr, ipvsh, 1, 1120 IP_VS_RT_MODE_LOCAL | 1121 IP_VS_RT_MODE_NON_LOCAL | 1122 IP_VS_RT_MODE_TUNNEL); 1123 if (local < 0) 1124 goto tx_error; 1125 if (local) 1126 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); 1127 1128 rt = (struct rt6_info *) skb_dst(skb); 1129 tdev = rt->dst.dev; 1130 1131 /* 1132 * Okay, now see if we can stuff it in the buffer as-is. 1133 */ 1134 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); 1135 1136 skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, 1137 &next_protocol, &payload_len, 1138 &dsfield, &ttl, NULL); 1139 if (IS_ERR(skb)) 1140 goto tx_error; 1141 1142 if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af))) 1143 goto tx_error; 1144 1145 skb->transport_header = skb->network_header; 1146 1147 skb_push(skb, sizeof(struct ipv6hdr)); 1148 skb_reset_network_header(skb); 1149 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1150 1151 /* 1152 * Push down and install the IPIP header. 1153 */ 1154 iph = ipv6_hdr(skb); 1155 iph->version = 6; 1156 iph->nexthdr = next_protocol; 1157 iph->payload_len = htons(payload_len); 1158 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 1159 ipv6_change_dsfield(iph, 0, dsfield); 1160 iph->daddr = cp->daddr.in6; 1161 iph->saddr = saddr; 1162 iph->hop_limit = ttl; 1163 1164 /* Another hack: avoid icmp_send in ip_fragment */ 1165 skb->ignore_df = 1; 1166 1167 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 1168 if (ret == NF_ACCEPT) 1169 ip6_local_out(cp->ipvs->net, skb->sk, skb); 1170 else if (ret == NF_DROP) 1171 kfree_skb(skb); 1172 1173 LeaveFunction(10); 1174 1175 return NF_STOLEN; 1176 1177 tx_error: 1178 if (!IS_ERR(skb)) 1179 kfree_skb(skb); 1180 LeaveFunction(10); 1181 return NF_STOLEN; 1182 } 1183 #endif 1184 1185 1186 /* 1187 * Direct Routing transmitter 1188 * Used for ANY protocol 1189 */ 1190 int 1191 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1192 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1193 { 1194 int local; 1195 1196 EnterFunction(10); 1197 1198 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, 1199 IP_VS_RT_MODE_LOCAL | 1200 IP_VS_RT_MODE_NON_LOCAL | 1201 IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); 1202 if (local < 0) 1203 goto tx_error; 1204 if (local) 1205 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 1206 1207 ip_send_check(ip_hdr(skb)); 1208 1209 /* Another hack: avoid icmp_send in ip_fragment */ 1210 skb->ignore_df = 1; 1211 1212 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); 1213 1214 LeaveFunction(10); 1215 return NF_STOLEN; 1216 1217 tx_error: 1218 kfree_skb(skb); 1219 LeaveFunction(10); 1220 return NF_STOLEN; 1221 } 1222 1223 #ifdef CONFIG_IP_VS_IPV6 1224 int 1225 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1226 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1227 { 1228 int local; 1229 1230 EnterFunction(10); 1231 1232 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, 1233 &cp->daddr.in6, 1234 NULL, ipvsh, 0, 1235 IP_VS_RT_MODE_LOCAL | 1236 IP_VS_RT_MODE_NON_LOCAL | 1237 IP_VS_RT_MODE_KNOWN_NH); 1238 if (local < 0) 1239 goto tx_error; 1240 if (local) 1241 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); 1242 1243 /* Another hack: avoid icmp_send in ip_fragment */ 1244 skb->ignore_df = 1; 1245 1246 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); 1247 1248 LeaveFunction(10); 1249 return NF_STOLEN; 1250 1251 tx_error: 1252 kfree_skb(skb); 1253 LeaveFunction(10); 1254 return NF_STOLEN; 1255 } 1256 #endif 1257 1258 1259 /* 1260 * ICMP packet transmitter 1261 * called by the ip_vs_in_icmp 1262 */ 1263 int 1264 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1265 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1266 struct ip_vs_iphdr *iph) 1267 { 1268 struct rtable *rt; /* Route to the other host */ 1269 int rc; 1270 int local; 1271 int rt_mode, was_input; 1272 1273 EnterFunction(10); 1274 1275 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1276 forwarded directly here, because there is no need to 1277 translate address/port back */ 1278 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1279 if (cp->packet_xmit) 1280 rc = cp->packet_xmit(skb, cp, pp, iph); 1281 else 1282 rc = NF_ACCEPT; 1283 /* do not touch skb anymore */ 1284 atomic_inc(&cp->in_pkts); 1285 goto out; 1286 } 1287 1288 /* 1289 * mangle and send the packet here (only for VS/NAT) 1290 */ 1291 was_input = rt_is_input_route(skb_rtable(skb)); 1292 1293 /* LOCALNODE from FORWARD hook is not supported */ 1294 rt_mode = (hooknum != NF_INET_FORWARD) ? 1295 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1296 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1297 local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, 1298 NULL, iph); 1299 if (local < 0) 1300 goto tx_error; 1301 rt = skb_rtable(skb); 1302 1303 /* 1304 * Avoid duplicate tuple in reply direction for NAT traffic 1305 * to local address when connection is sync-ed 1306 */ 1307 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1308 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1309 enum ip_conntrack_info ctinfo; 1310 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 1311 1312 if (ct) { 1313 IP_VS_DBG(10, "%s(): " 1314 "stopping DNAT to local address %pI4\n", 1315 __func__, &cp->daddr.ip); 1316 goto tx_error; 1317 } 1318 } 1319 #endif 1320 1321 /* From world but DNAT to loopback address? */ 1322 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { 1323 IP_VS_DBG(1, "%s(): " 1324 "stopping DNAT to loopback %pI4\n", 1325 __func__, &cp->daddr.ip); 1326 goto tx_error; 1327 } 1328 1329 /* copy-on-write the packet before mangling it */ 1330 if (!skb_make_writable(skb, offset)) 1331 goto tx_error; 1332 1333 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1334 goto tx_error; 1335 1336 ip_vs_nat_icmp(skb, pp, cp, 0); 1337 1338 /* Another hack: avoid icmp_send in ip_fragment */ 1339 skb->ignore_df = 1; 1340 1341 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); 1342 goto out; 1343 1344 tx_error: 1345 kfree_skb(skb); 1346 rc = NF_STOLEN; 1347 out: 1348 LeaveFunction(10); 1349 return rc; 1350 } 1351 1352 #ifdef CONFIG_IP_VS_IPV6 1353 int 1354 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1355 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1356 struct ip_vs_iphdr *ipvsh) 1357 { 1358 struct rt6_info *rt; /* Route to the other host */ 1359 int rc; 1360 int local; 1361 int rt_mode; 1362 1363 EnterFunction(10); 1364 1365 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1366 forwarded directly here, because there is no need to 1367 translate address/port back */ 1368 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1369 if (cp->packet_xmit) 1370 rc = cp->packet_xmit(skb, cp, pp, ipvsh); 1371 else 1372 rc = NF_ACCEPT; 1373 /* do not touch skb anymore */ 1374 atomic_inc(&cp->in_pkts); 1375 goto out; 1376 } 1377 1378 /* 1379 * mangle and send the packet here (only for VS/NAT) 1380 */ 1381 1382 /* LOCALNODE from FORWARD hook is not supported */ 1383 rt_mode = (hooknum != NF_INET_FORWARD) ? 1384 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1385 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1386 local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, 1387 &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); 1388 if (local < 0) 1389 goto tx_error; 1390 rt = (struct rt6_info *) skb_dst(skb); 1391 /* 1392 * Avoid duplicate tuple in reply direction for NAT traffic 1393 * to local address when connection is sync-ed 1394 */ 1395 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1396 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1397 enum ip_conntrack_info ctinfo; 1398 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 1399 1400 if (ct) { 1401 IP_VS_DBG(10, "%s(): " 1402 "stopping DNAT to local address %pI6\n", 1403 __func__, &cp->daddr.in6); 1404 goto tx_error; 1405 } 1406 } 1407 #endif 1408 1409 /* From world but DNAT to loopback address? */ 1410 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 1411 ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { 1412 IP_VS_DBG(1, "%s(): " 1413 "stopping DNAT to loopback %pI6\n", 1414 __func__, &cp->daddr.in6); 1415 goto tx_error; 1416 } 1417 1418 /* copy-on-write the packet before mangling it */ 1419 if (!skb_make_writable(skb, offset)) 1420 goto tx_error; 1421 1422 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1423 goto tx_error; 1424 1425 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1426 1427 /* Another hack: avoid icmp_send in ip_fragment */ 1428 skb->ignore_df = 1; 1429 1430 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); 1431 goto out; 1432 1433 tx_error: 1434 kfree_skb(skb); 1435 rc = NF_STOLEN; 1436 out: 1437 LeaveFunction(10); 1438 return rc; 1439 } 1440 #endif 1441