1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPVS An implementation of the IP virtual server support for the 4 * LINUX operating system. IPVS is now implemented as a module 5 * over the NetFilter framework. IPVS can be used to build a 6 * high-performance and highly available server based on a 7 * cluster of servers. 8 * 9 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 10 * Peter Kese <peter.kese@ijs.si> 11 * Julian Anastasov <ja@ssi.bg> 12 * 13 * Changes: 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/init.h> 21 #include <linux/types.h> 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/sysctl.h> 25 #include <linux/proc_fs.h> 26 #include <linux/workqueue.h> 27 #include <linux/swap.h> 28 #include <linux/seq_file.h> 29 #include <linux/slab.h> 30 31 #include <linux/netfilter.h> 32 #include <linux/netfilter_ipv4.h> 33 #include <linux/mutex.h> 34 35 #include <net/net_namespace.h> 36 #include <linux/nsproxy.h> 37 #include <net/ip.h> 38 #ifdef CONFIG_IP_VS_IPV6 39 #include <net/ipv6.h> 40 #include <net/ip6_route.h> 41 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 42 #endif 43 #include <net/route.h> 44 #include <net/sock.h> 45 #include <net/genetlink.h> 46 47 #include <linux/uaccess.h> 48 49 #include <net/ip_vs.h> 50 51 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 52 static DEFINE_MUTEX(__ip_vs_mutex); 53 54 /* sysctl variables */ 55 56 #ifdef CONFIG_IP_VS_DEBUG 57 static int sysctl_ip_vs_debug_level = 0; 58 59 int ip_vs_get_debug_level(void) 60 { 61 return sysctl_ip_vs_debug_level; 62 } 63 #endif 64 65 66 /* Protos */ 67 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); 68 69 70 #ifdef CONFIG_IP_VS_IPV6 71 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 72 static bool __ip_vs_addr_is_local_v6(struct net *net, 73 const struct in6_addr *addr) 74 { 75 struct flowi6 fl6 = { 76 .daddr = *addr, 77 }; 78 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); 79 bool is_local; 80 81 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); 82 83 dst_release(dst); 84 return is_local; 85 } 86 #endif 87 88 #ifdef CONFIG_SYSCTL 89 /* 90 * update_defense_level is called from keventd and from sysctl, 91 * so it needs to protect itself from softirqs 92 */ 93 static void update_defense_level(struct netns_ipvs *ipvs) 94 { 95 struct sysinfo i; 96 int availmem; 97 int nomem; 98 int to_change = -1; 99 100 /* we only count free and buffered memory (in pages) */ 101 si_meminfo(&i); 102 availmem = i.freeram + i.bufferram; 103 /* however in linux 2.5 the i.bufferram is total page cache size, 104 we need adjust it */ 105 /* si_swapinfo(&i); */ 106 /* availmem = availmem - (i.totalswap - i.freeswap); */ 107 108 nomem = (availmem < ipvs->sysctl_amemthresh); 109 110 local_bh_disable(); 111 112 /* drop_entry */ 113 spin_lock(&ipvs->dropentry_lock); 114 switch (ipvs->sysctl_drop_entry) { 115 case 0: 116 atomic_set(&ipvs->dropentry, 0); 117 break; 118 case 1: 119 if (nomem) { 120 atomic_set(&ipvs->dropentry, 1); 121 ipvs->sysctl_drop_entry = 2; 122 } else { 123 atomic_set(&ipvs->dropentry, 0); 124 } 125 break; 126 case 2: 127 if (nomem) { 128 atomic_set(&ipvs->dropentry, 1); 129 } else { 130 atomic_set(&ipvs->dropentry, 0); 131 ipvs->sysctl_drop_entry = 1; 132 } 133 break; 134 case 3: 135 atomic_set(&ipvs->dropentry, 1); 136 break; 137 } 138 spin_unlock(&ipvs->dropentry_lock); 139 140 /* drop_packet */ 141 spin_lock(&ipvs->droppacket_lock); 142 switch (ipvs->sysctl_drop_packet) { 143 case 0: 144 ipvs->drop_rate = 0; 145 break; 146 case 1: 147 if (nomem) { 148 ipvs->drop_rate = ipvs->drop_counter 149 = ipvs->sysctl_amemthresh / 150 (ipvs->sysctl_amemthresh-availmem); 151 ipvs->sysctl_drop_packet = 2; 152 } else { 153 ipvs->drop_rate = 0; 154 } 155 break; 156 case 2: 157 if (nomem) { 158 ipvs->drop_rate = ipvs->drop_counter 159 = ipvs->sysctl_amemthresh / 160 (ipvs->sysctl_amemthresh-availmem); 161 } else { 162 ipvs->drop_rate = 0; 163 ipvs->sysctl_drop_packet = 1; 164 } 165 break; 166 case 3: 167 ipvs->drop_rate = ipvs->sysctl_am_droprate; 168 break; 169 } 170 spin_unlock(&ipvs->droppacket_lock); 171 172 /* secure_tcp */ 173 spin_lock(&ipvs->securetcp_lock); 174 switch (ipvs->sysctl_secure_tcp) { 175 case 0: 176 if (ipvs->old_secure_tcp >= 2) 177 to_change = 0; 178 break; 179 case 1: 180 if (nomem) { 181 if (ipvs->old_secure_tcp < 2) 182 to_change = 1; 183 ipvs->sysctl_secure_tcp = 2; 184 } else { 185 if (ipvs->old_secure_tcp >= 2) 186 to_change = 0; 187 } 188 break; 189 case 2: 190 if (nomem) { 191 if (ipvs->old_secure_tcp < 2) 192 to_change = 1; 193 } else { 194 if (ipvs->old_secure_tcp >= 2) 195 to_change = 0; 196 ipvs->sysctl_secure_tcp = 1; 197 } 198 break; 199 case 3: 200 if (ipvs->old_secure_tcp < 2) 201 to_change = 1; 202 break; 203 } 204 ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; 205 if (to_change >= 0) 206 ip_vs_protocol_timeout_change(ipvs, 207 ipvs->sysctl_secure_tcp > 1); 208 spin_unlock(&ipvs->securetcp_lock); 209 210 local_bh_enable(); 211 } 212 213 /* Handler for delayed work for expiring no 214 * destination connections 215 */ 216 static void expire_nodest_conn_handler(struct work_struct *work) 217 { 218 struct netns_ipvs *ipvs; 219 220 ipvs = container_of(work, struct netns_ipvs, 221 expire_nodest_conn_work.work); 222 ip_vs_expire_nodest_conn_flush(ipvs); 223 } 224 225 /* 226 * Timer for checking the defense 227 */ 228 #define DEFENSE_TIMER_PERIOD 1*HZ 229 230 static void defense_work_handler(struct work_struct *work) 231 { 232 struct netns_ipvs *ipvs = 233 container_of(work, struct netns_ipvs, defense_work.work); 234 235 update_defense_level(ipvs); 236 if (atomic_read(&ipvs->dropentry)) 237 ip_vs_random_dropentry(ipvs); 238 queue_delayed_work(system_long_wq, &ipvs->defense_work, 239 DEFENSE_TIMER_PERIOD); 240 } 241 #endif 242 243 int 244 ip_vs_use_count_inc(void) 245 { 246 return try_module_get(THIS_MODULE); 247 } 248 249 void 250 ip_vs_use_count_dec(void) 251 { 252 module_put(THIS_MODULE); 253 } 254 255 256 /* 257 * Hash table: for virtual service lookups 258 */ 259 #define IP_VS_SVC_TAB_BITS 8 260 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 261 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 262 263 /* the service table hashed by <protocol, addr, port> */ 264 static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 265 /* the service table hashed by fwmark */ 266 static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 267 268 269 /* 270 * Returns hash value for virtual service 271 */ 272 static inline unsigned int 273 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, 274 const union nf_inet_addr *addr, __be16 port) 275 { 276 unsigned int porth = ntohs(port); 277 __be32 addr_fold = addr->ip; 278 __u32 ahash; 279 280 #ifdef CONFIG_IP_VS_IPV6 281 if (af == AF_INET6) 282 addr_fold = addr->ip6[0]^addr->ip6[1]^ 283 addr->ip6[2]^addr->ip6[3]; 284 #endif 285 ahash = ntohl(addr_fold); 286 ahash ^= ((size_t) ipvs >> 8); 287 288 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 289 IP_VS_SVC_TAB_MASK; 290 } 291 292 /* 293 * Returns hash value of fwmark for virtual service lookup 294 */ 295 static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) 296 { 297 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 298 } 299 300 /* 301 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 302 * or in the ip_vs_svc_fwm_table by fwmark. 303 * Should be called with locked tables. 304 */ 305 static int ip_vs_svc_hash(struct ip_vs_service *svc) 306 { 307 unsigned int hash; 308 309 if (svc->flags & IP_VS_SVC_F_HASHED) { 310 pr_err("%s(): request for already hashed, called from %pS\n", 311 __func__, __builtin_return_address(0)); 312 return 0; 313 } 314 315 if (svc->fwmark == 0) { 316 /* 317 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 318 */ 319 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, 320 &svc->addr, svc->port); 321 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 322 } else { 323 /* 324 * Hash it by fwmark in svc_fwm_table 325 */ 326 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); 327 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 328 } 329 330 svc->flags |= IP_VS_SVC_F_HASHED; 331 /* increase its refcnt because it is referenced by the svc table */ 332 atomic_inc(&svc->refcnt); 333 return 1; 334 } 335 336 337 /* 338 * Unhashes a service from svc_table / svc_fwm_table. 339 * Should be called with locked tables. 340 */ 341 static int ip_vs_svc_unhash(struct ip_vs_service *svc) 342 { 343 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 344 pr_err("%s(): request for unhash flagged, called from %pS\n", 345 __func__, __builtin_return_address(0)); 346 return 0; 347 } 348 349 if (svc->fwmark == 0) { 350 /* Remove it from the svc_table table */ 351 hlist_del_rcu(&svc->s_list); 352 } else { 353 /* Remove it from the svc_fwm_table table */ 354 hlist_del_rcu(&svc->f_list); 355 } 356 357 svc->flags &= ~IP_VS_SVC_F_HASHED; 358 atomic_dec(&svc->refcnt); 359 return 1; 360 } 361 362 363 /* 364 * Get service by {netns, proto,addr,port} in the service table. 365 */ 366 static inline struct ip_vs_service * 367 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, 368 const union nf_inet_addr *vaddr, __be16 vport) 369 { 370 unsigned int hash; 371 struct ip_vs_service *svc; 372 373 /* Check for "full" addressed entries */ 374 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); 375 376 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 377 if ((svc->af == af) 378 && ip_vs_addr_equal(af, &svc->addr, vaddr) 379 && (svc->port == vport) 380 && (svc->protocol == protocol) 381 && (svc->ipvs == ipvs)) { 382 /* HIT */ 383 return svc; 384 } 385 } 386 387 return NULL; 388 } 389 390 391 /* 392 * Get service by {fwmark} in the service table. 393 */ 394 static inline struct ip_vs_service * 395 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) 396 { 397 unsigned int hash; 398 struct ip_vs_service *svc; 399 400 /* Check for fwmark addressed entries */ 401 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); 402 403 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 404 if (svc->fwmark == fwmark && svc->af == af 405 && (svc->ipvs == ipvs)) { 406 /* HIT */ 407 return svc; 408 } 409 } 410 411 return NULL; 412 } 413 414 /* Find service, called under RCU lock */ 415 struct ip_vs_service * 416 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, 417 const union nf_inet_addr *vaddr, __be16 vport) 418 { 419 struct ip_vs_service *svc; 420 421 /* 422 * Check the table hashed by fwmark first 423 */ 424 if (fwmark) { 425 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); 426 if (svc) 427 goto out; 428 } 429 430 /* 431 * Check the table hashed by <protocol,addr,port> 432 * for "full" addressed entries 433 */ 434 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); 435 436 if (!svc && protocol == IPPROTO_TCP && 437 atomic_read(&ipvs->ftpsvc_counter) && 438 (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) { 439 /* 440 * Check if ftp service entry exists, the packet 441 * might belong to FTP data connections. 442 */ 443 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); 444 } 445 446 if (svc == NULL 447 && atomic_read(&ipvs->nullsvc_counter)) { 448 /* 449 * Check if the catch-all port (port zero) exists 450 */ 451 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0); 452 } 453 454 out: 455 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 456 fwmark, ip_vs_proto_name(protocol), 457 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 458 svc ? "hit" : "not hit"); 459 460 return svc; 461 } 462 463 464 static inline void 465 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 466 { 467 atomic_inc(&svc->refcnt); 468 rcu_assign_pointer(dest->svc, svc); 469 } 470 471 static void ip_vs_service_free(struct ip_vs_service *svc) 472 { 473 free_percpu(svc->stats.cpustats); 474 kfree(svc); 475 } 476 477 static void ip_vs_service_rcu_free(struct rcu_head *head) 478 { 479 struct ip_vs_service *svc; 480 481 svc = container_of(head, struct ip_vs_service, rcu_head); 482 ip_vs_service_free(svc); 483 } 484 485 static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) 486 { 487 if (atomic_dec_and_test(&svc->refcnt)) { 488 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 489 svc->fwmark, 490 IP_VS_DBG_ADDR(svc->af, &svc->addr), 491 ntohs(svc->port)); 492 if (do_delay) 493 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); 494 else 495 ip_vs_service_free(svc); 496 } 497 } 498 499 500 /* 501 * Returns hash value for real service 502 */ 503 static inline unsigned int ip_vs_rs_hashkey(int af, 504 const union nf_inet_addr *addr, 505 __be16 port) 506 { 507 unsigned int porth = ntohs(port); 508 __be32 addr_fold = addr->ip; 509 510 #ifdef CONFIG_IP_VS_IPV6 511 if (af == AF_INET6) 512 addr_fold = addr->ip6[0]^addr->ip6[1]^ 513 addr->ip6[2]^addr->ip6[3]; 514 #endif 515 516 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 517 & IP_VS_RTAB_MASK; 518 } 519 520 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ 521 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 522 { 523 unsigned int hash; 524 __be16 port; 525 526 if (dest->in_rs_table) 527 return; 528 529 switch (IP_VS_DFWD_METHOD(dest)) { 530 case IP_VS_CONN_F_MASQ: 531 port = dest->port; 532 break; 533 case IP_VS_CONN_F_TUNNEL: 534 switch (dest->tun_type) { 535 case IP_VS_CONN_F_TUNNEL_TYPE_GUE: 536 port = dest->tun_port; 537 break; 538 case IP_VS_CONN_F_TUNNEL_TYPE_IPIP: 539 case IP_VS_CONN_F_TUNNEL_TYPE_GRE: 540 port = 0; 541 break; 542 default: 543 return; 544 } 545 break; 546 default: 547 return; 548 } 549 550 /* 551 * Hash by proto,addr,port, 552 * which are the parameters of the real service. 553 */ 554 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port); 555 556 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); 557 dest->in_rs_table = 1; 558 } 559 560 /* Unhash ip_vs_dest from rs_table. */ 561 static void ip_vs_rs_unhash(struct ip_vs_dest *dest) 562 { 563 /* 564 * Remove it from the rs_table table. 565 */ 566 if (dest->in_rs_table) { 567 hlist_del_rcu(&dest->d_list); 568 dest->in_rs_table = 0; 569 } 570 } 571 572 /* Check if real service by <proto,addr,port> is present */ 573 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, 574 const union nf_inet_addr *daddr, __be16 dport) 575 { 576 unsigned int hash; 577 struct ip_vs_dest *dest; 578 579 /* Check for "full" addressed entries */ 580 hash = ip_vs_rs_hashkey(af, daddr, dport); 581 582 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 583 if (dest->port == dport && 584 dest->af == af && 585 ip_vs_addr_equal(af, &dest->addr, daddr) && 586 (dest->protocol == protocol || dest->vfwmark) && 587 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 588 /* HIT */ 589 return true; 590 } 591 } 592 593 return false; 594 } 595 596 /* Find real service record by <proto,addr,port>. 597 * In case of multiple records with the same <proto,addr,port>, only 598 * the first found record is returned. 599 * 600 * To be called under RCU lock. 601 */ 602 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, 603 __u16 protocol, 604 const union nf_inet_addr *daddr, 605 __be16 dport) 606 { 607 unsigned int hash; 608 struct ip_vs_dest *dest; 609 610 /* Check for "full" addressed entries */ 611 hash = ip_vs_rs_hashkey(af, daddr, dport); 612 613 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 614 if (dest->port == dport && 615 dest->af == af && 616 ip_vs_addr_equal(af, &dest->addr, daddr) && 617 (dest->protocol == protocol || dest->vfwmark) && 618 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 619 /* HIT */ 620 return dest; 621 } 622 } 623 624 return NULL; 625 } 626 627 /* Find real service record by <af,addr,tun_port>. 628 * In case of multiple records with the same <af,addr,tun_port>, only 629 * the first found record is returned. 630 * 631 * To be called under RCU lock. 632 */ 633 struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af, 634 const union nf_inet_addr *daddr, 635 __be16 tun_port) 636 { 637 struct ip_vs_dest *dest; 638 unsigned int hash; 639 640 /* Check for "full" addressed entries */ 641 hash = ip_vs_rs_hashkey(af, daddr, tun_port); 642 643 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 644 if (dest->tun_port == tun_port && 645 dest->af == af && 646 ip_vs_addr_equal(af, &dest->addr, daddr) && 647 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) { 648 /* HIT */ 649 return dest; 650 } 651 } 652 653 return NULL; 654 } 655 656 /* Lookup destination by {addr,port} in the given service 657 * Called under RCU lock. 658 */ 659 static struct ip_vs_dest * 660 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, 661 const union nf_inet_addr *daddr, __be16 dport) 662 { 663 struct ip_vs_dest *dest; 664 665 /* 666 * Find the destination for the given service 667 */ 668 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 669 if ((dest->af == dest_af) && 670 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 671 (dest->port == dport)) { 672 /* HIT */ 673 return dest; 674 } 675 } 676 677 return NULL; 678 } 679 680 /* 681 * Find destination by {daddr,dport,vaddr,protocol} 682 * Created to be used in ip_vs_process_message() in 683 * the backup synchronization daemon. It finds the 684 * destination to be bound to the received connection 685 * on the backup. 686 * Called under RCU lock, no refcnt is returned. 687 */ 688 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af, 689 const union nf_inet_addr *daddr, 690 __be16 dport, 691 const union nf_inet_addr *vaddr, 692 __be16 vport, __u16 protocol, __u32 fwmark, 693 __u32 flags) 694 { 695 struct ip_vs_dest *dest; 696 struct ip_vs_service *svc; 697 __be16 port = dport; 698 699 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport); 700 if (!svc) 701 return NULL; 702 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 703 port = 0; 704 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port); 705 if (!dest) 706 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport); 707 return dest; 708 } 709 710 void ip_vs_dest_dst_rcu_free(struct rcu_head *head) 711 { 712 struct ip_vs_dest_dst *dest_dst = container_of(head, 713 struct ip_vs_dest_dst, 714 rcu_head); 715 716 dst_release(dest_dst->dst_cache); 717 kfree(dest_dst); 718 } 719 720 /* Release dest_dst and dst_cache for dest in user context */ 721 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) 722 { 723 struct ip_vs_dest_dst *old; 724 725 old = rcu_dereference_protected(dest->dest_dst, 1); 726 if (old) { 727 RCU_INIT_POINTER(dest->dest_dst, NULL); 728 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 729 } 730 } 731 732 /* 733 * Lookup dest by {svc,addr,port} in the destination trash. 734 * The destination trash is used to hold the destinations that are removed 735 * from the service table but are still referenced by some conn entries. 736 * The reason to add the destination trash is when the dest is temporary 737 * down (either by administrator or by monitor program), the dest can be 738 * picked back from the trash, the remaining connections to the dest can 739 * continue, and the counting information of the dest is also useful for 740 * scheduling. 741 */ 742 static struct ip_vs_dest * 743 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, 744 const union nf_inet_addr *daddr, __be16 dport) 745 { 746 struct ip_vs_dest *dest; 747 struct netns_ipvs *ipvs = svc->ipvs; 748 749 /* 750 * Find the destination in trash 751 */ 752 spin_lock_bh(&ipvs->dest_trash_lock); 753 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 754 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 755 "dest->refcnt=%d\n", 756 dest->vfwmark, 757 IP_VS_DBG_ADDR(dest->af, &dest->addr), 758 ntohs(dest->port), 759 refcount_read(&dest->refcnt)); 760 if (dest->af == dest_af && 761 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 762 dest->port == dport && 763 dest->vfwmark == svc->fwmark && 764 dest->protocol == svc->protocol && 765 (svc->fwmark || 766 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 767 dest->vport == svc->port))) { 768 /* HIT */ 769 list_del(&dest->t_list); 770 goto out; 771 } 772 } 773 774 dest = NULL; 775 776 out: 777 spin_unlock_bh(&ipvs->dest_trash_lock); 778 779 return dest; 780 } 781 782 static void ip_vs_dest_free(struct ip_vs_dest *dest) 783 { 784 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); 785 786 __ip_vs_dst_cache_reset(dest); 787 __ip_vs_svc_put(svc, false); 788 free_percpu(dest->stats.cpustats); 789 ip_vs_dest_put_and_free(dest); 790 } 791 792 /* 793 * Clean up all the destinations in the trash 794 * Called by the ip_vs_control_cleanup() 795 * 796 * When the ip_vs_control_clearup is activated by ipvs module exit, 797 * the service tables must have been flushed and all the connections 798 * are expired, and the refcnt of each destination in the trash must 799 * be 1, so we simply release them here. 800 */ 801 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) 802 { 803 struct ip_vs_dest *dest, *nxt; 804 805 del_timer_sync(&ipvs->dest_trash_timer); 806 /* No need to use dest_trash_lock */ 807 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { 808 list_del(&dest->t_list); 809 ip_vs_dest_free(dest); 810 } 811 } 812 813 static void 814 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) 815 { 816 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c 817 818 spin_lock_bh(&src->lock); 819 820 IP_VS_SHOW_STATS_COUNTER(conns); 821 IP_VS_SHOW_STATS_COUNTER(inpkts); 822 IP_VS_SHOW_STATS_COUNTER(outpkts); 823 IP_VS_SHOW_STATS_COUNTER(inbytes); 824 IP_VS_SHOW_STATS_COUNTER(outbytes); 825 826 ip_vs_read_estimator(dst, src); 827 828 spin_unlock_bh(&src->lock); 829 } 830 831 static void 832 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) 833 { 834 dst->conns = (u32)src->conns; 835 dst->inpkts = (u32)src->inpkts; 836 dst->outpkts = (u32)src->outpkts; 837 dst->inbytes = src->inbytes; 838 dst->outbytes = src->outbytes; 839 dst->cps = (u32)src->cps; 840 dst->inpps = (u32)src->inpps; 841 dst->outpps = (u32)src->outpps; 842 dst->inbps = (u32)src->inbps; 843 dst->outbps = (u32)src->outbps; 844 } 845 846 static void 847 ip_vs_zero_stats(struct ip_vs_stats *stats) 848 { 849 spin_lock_bh(&stats->lock); 850 851 /* get current counters as zero point, rates are zeroed */ 852 853 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c 854 855 IP_VS_ZERO_STATS_COUNTER(conns); 856 IP_VS_ZERO_STATS_COUNTER(inpkts); 857 IP_VS_ZERO_STATS_COUNTER(outpkts); 858 IP_VS_ZERO_STATS_COUNTER(inbytes); 859 IP_VS_ZERO_STATS_COUNTER(outbytes); 860 861 ip_vs_zero_estimator(stats); 862 863 spin_unlock_bh(&stats->lock); 864 } 865 866 /* 867 * Update a destination in the given service 868 */ 869 static void 870 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 871 struct ip_vs_dest_user_kern *udest, int add) 872 { 873 struct netns_ipvs *ipvs = svc->ipvs; 874 struct ip_vs_service *old_svc; 875 struct ip_vs_scheduler *sched; 876 int conn_flags; 877 878 /* We cannot modify an address and change the address family */ 879 BUG_ON(!add && udest->af != dest->af); 880 881 if (add && udest->af != svc->af) 882 ipvs->mixed_address_family_dests++; 883 884 /* keep the last_weight with latest non-0 weight */ 885 if (add || udest->weight != 0) 886 atomic_set(&dest->last_weight, udest->weight); 887 888 /* set the weight and the flags */ 889 atomic_set(&dest->weight, udest->weight); 890 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 891 conn_flags |= IP_VS_CONN_F_INACTIVE; 892 893 /* Need to rehash? */ 894 if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) != 895 IP_VS_DFWD_METHOD(dest) || 896 udest->tun_type != dest->tun_type || 897 udest->tun_port != dest->tun_port) 898 ip_vs_rs_unhash(dest); 899 900 /* set the tunnel info */ 901 dest->tun_type = udest->tun_type; 902 dest->tun_port = udest->tun_port; 903 dest->tun_flags = udest->tun_flags; 904 905 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 906 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 907 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 908 } else { 909 /* FTP-NAT requires conntrack for mangling */ 910 if (svc->port == FTPPORT) 911 ip_vs_register_conntrack(svc); 912 } 913 atomic_set(&dest->conn_flags, conn_flags); 914 /* Put the real service in rs_table if not present. */ 915 ip_vs_rs_hash(ipvs, dest); 916 917 /* bind the service */ 918 old_svc = rcu_dereference_protected(dest->svc, 1); 919 if (!old_svc) { 920 __ip_vs_bind_svc(dest, svc); 921 } else { 922 if (old_svc != svc) { 923 ip_vs_zero_stats(&dest->stats); 924 __ip_vs_bind_svc(dest, svc); 925 __ip_vs_svc_put(old_svc, true); 926 } 927 } 928 929 /* set the dest status flags */ 930 dest->flags |= IP_VS_DEST_F_AVAILABLE; 931 932 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 933 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 934 dest->u_threshold = udest->u_threshold; 935 dest->l_threshold = udest->l_threshold; 936 937 dest->af = udest->af; 938 939 spin_lock_bh(&dest->dst_lock); 940 __ip_vs_dst_cache_reset(dest); 941 spin_unlock_bh(&dest->dst_lock); 942 943 if (add) { 944 ip_vs_start_estimator(svc->ipvs, &dest->stats); 945 list_add_rcu(&dest->n_list, &svc->destinations); 946 svc->num_dests++; 947 sched = rcu_dereference_protected(svc->scheduler, 1); 948 if (sched && sched->add_dest) 949 sched->add_dest(svc, dest); 950 } else { 951 sched = rcu_dereference_protected(svc->scheduler, 1); 952 if (sched && sched->upd_dest) 953 sched->upd_dest(svc, dest); 954 } 955 } 956 957 958 /* 959 * Create a destination for the given service 960 */ 961 static int 962 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, 963 struct ip_vs_dest **dest_p) 964 { 965 struct ip_vs_dest *dest; 966 unsigned int atype, i; 967 968 EnterFunction(2); 969 970 #ifdef CONFIG_IP_VS_IPV6 971 if (udest->af == AF_INET6) { 972 int ret; 973 974 atype = ipv6_addr_type(&udest->addr.in6); 975 if ((!(atype & IPV6_ADDR_UNICAST) || 976 atype & IPV6_ADDR_LINKLOCAL) && 977 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) 978 return -EINVAL; 979 980 ret = nf_defrag_ipv6_enable(svc->ipvs->net); 981 if (ret) 982 return ret; 983 } else 984 #endif 985 { 986 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip); 987 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 988 return -EINVAL; 989 } 990 991 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); 992 if (dest == NULL) 993 return -ENOMEM; 994 995 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 996 if (!dest->stats.cpustats) 997 goto err_alloc; 998 999 for_each_possible_cpu(i) { 1000 struct ip_vs_cpu_stats *ip_vs_dest_stats; 1001 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); 1002 u64_stats_init(&ip_vs_dest_stats->syncp); 1003 } 1004 1005 dest->af = udest->af; 1006 dest->protocol = svc->protocol; 1007 dest->vaddr = svc->addr; 1008 dest->vport = svc->port; 1009 dest->vfwmark = svc->fwmark; 1010 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); 1011 dest->port = udest->port; 1012 1013 atomic_set(&dest->activeconns, 0); 1014 atomic_set(&dest->inactconns, 0); 1015 atomic_set(&dest->persistconns, 0); 1016 refcount_set(&dest->refcnt, 1); 1017 1018 INIT_HLIST_NODE(&dest->d_list); 1019 spin_lock_init(&dest->dst_lock); 1020 spin_lock_init(&dest->stats.lock); 1021 __ip_vs_update_dest(svc, dest, udest, 1); 1022 1023 *dest_p = dest; 1024 1025 LeaveFunction(2); 1026 return 0; 1027 1028 err_alloc: 1029 kfree(dest); 1030 return -ENOMEM; 1031 } 1032 1033 1034 /* 1035 * Add a destination into an existing service 1036 */ 1037 static int 1038 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1039 { 1040 struct ip_vs_dest *dest; 1041 union nf_inet_addr daddr; 1042 __be16 dport = udest->port; 1043 int ret; 1044 1045 EnterFunction(2); 1046 1047 if (udest->weight < 0) { 1048 pr_err("%s(): server weight less than zero\n", __func__); 1049 return -ERANGE; 1050 } 1051 1052 if (udest->l_threshold > udest->u_threshold) { 1053 pr_err("%s(): lower threshold is higher than upper threshold\n", 1054 __func__); 1055 return -ERANGE; 1056 } 1057 1058 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1059 if (udest->tun_port == 0) { 1060 pr_err("%s(): tunnel port is zero\n", __func__); 1061 return -EINVAL; 1062 } 1063 } 1064 1065 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1066 1067 /* We use function that requires RCU lock */ 1068 rcu_read_lock(); 1069 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1070 rcu_read_unlock(); 1071 1072 if (dest != NULL) { 1073 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 1074 return -EEXIST; 1075 } 1076 1077 /* 1078 * Check if the dest already exists in the trash and 1079 * is from the same service 1080 */ 1081 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport); 1082 1083 if (dest != NULL) { 1084 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 1085 "dest->refcnt=%d, service %u/%s:%u\n", 1086 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), 1087 refcount_read(&dest->refcnt), 1088 dest->vfwmark, 1089 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 1090 ntohs(dest->vport)); 1091 1092 __ip_vs_update_dest(svc, dest, udest, 1); 1093 ret = 0; 1094 } else { 1095 /* 1096 * Allocate and initialize the dest structure 1097 */ 1098 ret = ip_vs_new_dest(svc, udest, &dest); 1099 } 1100 LeaveFunction(2); 1101 1102 return ret; 1103 } 1104 1105 1106 /* 1107 * Edit a destination in the given service 1108 */ 1109 static int 1110 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1111 { 1112 struct ip_vs_dest *dest; 1113 union nf_inet_addr daddr; 1114 __be16 dport = udest->port; 1115 1116 EnterFunction(2); 1117 1118 if (udest->weight < 0) { 1119 pr_err("%s(): server weight less than zero\n", __func__); 1120 return -ERANGE; 1121 } 1122 1123 if (udest->l_threshold > udest->u_threshold) { 1124 pr_err("%s(): lower threshold is higher than upper threshold\n", 1125 __func__); 1126 return -ERANGE; 1127 } 1128 1129 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1130 if (udest->tun_port == 0) { 1131 pr_err("%s(): tunnel port is zero\n", __func__); 1132 return -EINVAL; 1133 } 1134 } 1135 1136 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1137 1138 /* We use function that requires RCU lock */ 1139 rcu_read_lock(); 1140 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1141 rcu_read_unlock(); 1142 1143 if (dest == NULL) { 1144 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 1145 return -ENOENT; 1146 } 1147 1148 __ip_vs_update_dest(svc, dest, udest, 0); 1149 LeaveFunction(2); 1150 1151 return 0; 1152 } 1153 1154 /* 1155 * Delete a destination (must be already unlinked from the service) 1156 */ 1157 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, 1158 bool cleanup) 1159 { 1160 ip_vs_stop_estimator(ipvs, &dest->stats); 1161 1162 /* 1163 * Remove it from the d-linked list with the real services. 1164 */ 1165 ip_vs_rs_unhash(dest); 1166 1167 spin_lock_bh(&ipvs->dest_trash_lock); 1168 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1169 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1170 refcount_read(&dest->refcnt)); 1171 if (list_empty(&ipvs->dest_trash) && !cleanup) 1172 mod_timer(&ipvs->dest_trash_timer, 1173 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1174 /* dest lives in trash with reference */ 1175 list_add(&dest->t_list, &ipvs->dest_trash); 1176 dest->idle_start = 0; 1177 spin_unlock_bh(&ipvs->dest_trash_lock); 1178 1179 /* Queue up delayed work to expire all no destination connections. 1180 * No-op when CONFIG_SYSCTL is disabled. 1181 */ 1182 if (!cleanup) 1183 ip_vs_enqueue_expire_nodest_conns(ipvs); 1184 } 1185 1186 1187 /* 1188 * Unlink a destination from the given service 1189 */ 1190 static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1191 struct ip_vs_dest *dest, 1192 int svcupd) 1193 { 1194 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1195 1196 /* 1197 * Remove it from the d-linked destination list. 1198 */ 1199 list_del_rcu(&dest->n_list); 1200 svc->num_dests--; 1201 1202 if (dest->af != svc->af) 1203 svc->ipvs->mixed_address_family_dests--; 1204 1205 if (svcupd) { 1206 struct ip_vs_scheduler *sched; 1207 1208 sched = rcu_dereference_protected(svc->scheduler, 1); 1209 if (sched && sched->del_dest) 1210 sched->del_dest(svc, dest); 1211 } 1212 } 1213 1214 1215 /* 1216 * Delete a destination server in the given service 1217 */ 1218 static int 1219 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1220 { 1221 struct ip_vs_dest *dest; 1222 __be16 dport = udest->port; 1223 1224 EnterFunction(2); 1225 1226 /* We use function that requires RCU lock */ 1227 rcu_read_lock(); 1228 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); 1229 rcu_read_unlock(); 1230 1231 if (dest == NULL) { 1232 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1233 return -ENOENT; 1234 } 1235 1236 /* 1237 * Unlink dest from the service 1238 */ 1239 __ip_vs_unlink_dest(svc, dest, 1); 1240 1241 /* 1242 * Delete the destination 1243 */ 1244 __ip_vs_del_dest(svc->ipvs, dest, false); 1245 1246 LeaveFunction(2); 1247 1248 return 0; 1249 } 1250 1251 static void ip_vs_dest_trash_expire(struct timer_list *t) 1252 { 1253 struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer); 1254 struct ip_vs_dest *dest, *next; 1255 unsigned long now = jiffies; 1256 1257 spin_lock(&ipvs->dest_trash_lock); 1258 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1259 if (refcount_read(&dest->refcnt) > 1) 1260 continue; 1261 if (dest->idle_start) { 1262 if (time_before(now, dest->idle_start + 1263 IP_VS_DEST_TRASH_PERIOD)) 1264 continue; 1265 } else { 1266 dest->idle_start = max(1UL, now); 1267 continue; 1268 } 1269 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1270 dest->vfwmark, 1271 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1272 ntohs(dest->port)); 1273 list_del(&dest->t_list); 1274 ip_vs_dest_free(dest); 1275 } 1276 if (!list_empty(&ipvs->dest_trash)) 1277 mod_timer(&ipvs->dest_trash_timer, 1278 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1279 spin_unlock(&ipvs->dest_trash_lock); 1280 } 1281 1282 /* 1283 * Add a service into the service hash table 1284 */ 1285 static int 1286 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, 1287 struct ip_vs_service **svc_p) 1288 { 1289 int ret = 0, i; 1290 struct ip_vs_scheduler *sched = NULL; 1291 struct ip_vs_pe *pe = NULL; 1292 struct ip_vs_service *svc = NULL; 1293 int ret_hooks = -1; 1294 1295 /* increase the module use count */ 1296 if (!ip_vs_use_count_inc()) 1297 return -ENOPROTOOPT; 1298 1299 /* Lookup the scheduler by 'u->sched_name' */ 1300 if (strcmp(u->sched_name, "none")) { 1301 sched = ip_vs_scheduler_get(u->sched_name); 1302 if (!sched) { 1303 pr_info("Scheduler module ip_vs_%s not found\n", 1304 u->sched_name); 1305 ret = -ENOENT; 1306 goto out_err; 1307 } 1308 } 1309 1310 if (u->pe_name && *u->pe_name) { 1311 pe = ip_vs_pe_getbyname(u->pe_name); 1312 if (pe == NULL) { 1313 pr_info("persistence engine module ip_vs_pe_%s " 1314 "not found\n", u->pe_name); 1315 ret = -ENOENT; 1316 goto out_err; 1317 } 1318 } 1319 1320 #ifdef CONFIG_IP_VS_IPV6 1321 if (u->af == AF_INET6) { 1322 __u32 plen = (__force __u32) u->netmask; 1323 1324 if (plen < 1 || plen > 128) { 1325 ret = -EINVAL; 1326 goto out_err; 1327 } 1328 1329 ret = nf_defrag_ipv6_enable(ipvs->net); 1330 if (ret) 1331 goto out_err; 1332 } 1333 #endif 1334 1335 if ((u->af == AF_INET && !ipvs->num_services) || 1336 (u->af == AF_INET6 && !ipvs->num_services6)) { 1337 ret = ip_vs_register_hooks(ipvs, u->af); 1338 if (ret < 0) 1339 goto out_err; 1340 ret_hooks = ret; 1341 } 1342 1343 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); 1344 if (svc == NULL) { 1345 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1346 ret = -ENOMEM; 1347 goto out_err; 1348 } 1349 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1350 if (!svc->stats.cpustats) { 1351 ret = -ENOMEM; 1352 goto out_err; 1353 } 1354 1355 for_each_possible_cpu(i) { 1356 struct ip_vs_cpu_stats *ip_vs_stats; 1357 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); 1358 u64_stats_init(&ip_vs_stats->syncp); 1359 } 1360 1361 1362 /* I'm the first user of the service */ 1363 atomic_set(&svc->refcnt, 0); 1364 1365 svc->af = u->af; 1366 svc->protocol = u->protocol; 1367 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1368 svc->port = u->port; 1369 svc->fwmark = u->fwmark; 1370 svc->flags = u->flags; 1371 svc->timeout = u->timeout * HZ; 1372 svc->netmask = u->netmask; 1373 svc->ipvs = ipvs; 1374 1375 INIT_LIST_HEAD(&svc->destinations); 1376 spin_lock_init(&svc->sched_lock); 1377 spin_lock_init(&svc->stats.lock); 1378 1379 /* Bind the scheduler */ 1380 if (sched) { 1381 ret = ip_vs_bind_scheduler(svc, sched); 1382 if (ret) 1383 goto out_err; 1384 sched = NULL; 1385 } 1386 1387 /* Bind the ct retriever */ 1388 RCU_INIT_POINTER(svc->pe, pe); 1389 pe = NULL; 1390 1391 /* Update the virtual service counters */ 1392 if (svc->port == FTPPORT) 1393 atomic_inc(&ipvs->ftpsvc_counter); 1394 else if (svc->port == 0) 1395 atomic_inc(&ipvs->nullsvc_counter); 1396 if (svc->pe && svc->pe->conn_out) 1397 atomic_inc(&ipvs->conn_out_counter); 1398 1399 ip_vs_start_estimator(ipvs, &svc->stats); 1400 1401 /* Count only IPv4 services for old get/setsockopt interface */ 1402 if (svc->af == AF_INET) 1403 ipvs->num_services++; 1404 else if (svc->af == AF_INET6) 1405 ipvs->num_services6++; 1406 1407 /* Hash the service into the service table */ 1408 ip_vs_svc_hash(svc); 1409 1410 *svc_p = svc; 1411 /* Now there is a service - full throttle */ 1412 ipvs->enable = 1; 1413 return 0; 1414 1415 1416 out_err: 1417 if (ret_hooks >= 0) 1418 ip_vs_unregister_hooks(ipvs, u->af); 1419 if (svc != NULL) { 1420 ip_vs_unbind_scheduler(svc, sched); 1421 ip_vs_service_free(svc); 1422 } 1423 ip_vs_scheduler_put(sched); 1424 ip_vs_pe_put(pe); 1425 1426 /* decrease the module use count */ 1427 ip_vs_use_count_dec(); 1428 1429 return ret; 1430 } 1431 1432 1433 /* 1434 * Edit a service and bind it with a new scheduler 1435 */ 1436 static int 1437 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1438 { 1439 struct ip_vs_scheduler *sched = NULL, *old_sched; 1440 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1441 int ret = 0; 1442 bool new_pe_conn_out, old_pe_conn_out; 1443 1444 /* 1445 * Lookup the scheduler, by 'u->sched_name' 1446 */ 1447 if (strcmp(u->sched_name, "none")) { 1448 sched = ip_vs_scheduler_get(u->sched_name); 1449 if (!sched) { 1450 pr_info("Scheduler module ip_vs_%s not found\n", 1451 u->sched_name); 1452 return -ENOENT; 1453 } 1454 } 1455 old_sched = sched; 1456 1457 if (u->pe_name && *u->pe_name) { 1458 pe = ip_vs_pe_getbyname(u->pe_name); 1459 if (pe == NULL) { 1460 pr_info("persistence engine module ip_vs_pe_%s " 1461 "not found\n", u->pe_name); 1462 ret = -ENOENT; 1463 goto out; 1464 } 1465 old_pe = pe; 1466 } 1467 1468 #ifdef CONFIG_IP_VS_IPV6 1469 if (u->af == AF_INET6) { 1470 __u32 plen = (__force __u32) u->netmask; 1471 1472 if (plen < 1 || plen > 128) { 1473 ret = -EINVAL; 1474 goto out; 1475 } 1476 } 1477 #endif 1478 1479 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1480 if (sched != old_sched) { 1481 if (old_sched) { 1482 ip_vs_unbind_scheduler(svc, old_sched); 1483 RCU_INIT_POINTER(svc->scheduler, NULL); 1484 /* Wait all svc->sched_data users */ 1485 synchronize_rcu(); 1486 } 1487 /* Bind the new scheduler */ 1488 if (sched) { 1489 ret = ip_vs_bind_scheduler(svc, sched); 1490 if (ret) { 1491 ip_vs_scheduler_put(sched); 1492 goto out; 1493 } 1494 } 1495 } 1496 1497 /* 1498 * Set the flags and timeout value 1499 */ 1500 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1501 svc->timeout = u->timeout * HZ; 1502 svc->netmask = u->netmask; 1503 1504 old_pe = rcu_dereference_protected(svc->pe, 1); 1505 if (pe != old_pe) { 1506 rcu_assign_pointer(svc->pe, pe); 1507 /* check for optional methods in new pe */ 1508 new_pe_conn_out = (pe && pe->conn_out) ? true : false; 1509 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; 1510 if (new_pe_conn_out && !old_pe_conn_out) 1511 atomic_inc(&svc->ipvs->conn_out_counter); 1512 if (old_pe_conn_out && !new_pe_conn_out) 1513 atomic_dec(&svc->ipvs->conn_out_counter); 1514 } 1515 1516 out: 1517 ip_vs_scheduler_put(old_sched); 1518 ip_vs_pe_put(old_pe); 1519 return ret; 1520 } 1521 1522 /* 1523 * Delete a service from the service list 1524 * - The service must be unlinked, unlocked and not referenced! 1525 * - We are called under _bh lock 1526 */ 1527 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) 1528 { 1529 struct ip_vs_dest *dest, *nxt; 1530 struct ip_vs_scheduler *old_sched; 1531 struct ip_vs_pe *old_pe; 1532 struct netns_ipvs *ipvs = svc->ipvs; 1533 1534 if (svc->af == AF_INET) { 1535 ipvs->num_services--; 1536 if (!ipvs->num_services) 1537 ip_vs_unregister_hooks(ipvs, svc->af); 1538 } else if (svc->af == AF_INET6) { 1539 ipvs->num_services6--; 1540 if (!ipvs->num_services6) 1541 ip_vs_unregister_hooks(ipvs, svc->af); 1542 } 1543 1544 ip_vs_stop_estimator(svc->ipvs, &svc->stats); 1545 1546 /* Unbind scheduler */ 1547 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1548 ip_vs_unbind_scheduler(svc, old_sched); 1549 ip_vs_scheduler_put(old_sched); 1550 1551 /* Unbind persistence engine, keep svc->pe */ 1552 old_pe = rcu_dereference_protected(svc->pe, 1); 1553 if (old_pe && old_pe->conn_out) 1554 atomic_dec(&ipvs->conn_out_counter); 1555 ip_vs_pe_put(old_pe); 1556 1557 /* 1558 * Unlink the whole destination list 1559 */ 1560 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1561 __ip_vs_unlink_dest(svc, dest, 0); 1562 __ip_vs_del_dest(svc->ipvs, dest, cleanup); 1563 } 1564 1565 /* 1566 * Update the virtual service counters 1567 */ 1568 if (svc->port == FTPPORT) 1569 atomic_dec(&ipvs->ftpsvc_counter); 1570 else if (svc->port == 0) 1571 atomic_dec(&ipvs->nullsvc_counter); 1572 1573 /* 1574 * Free the service if nobody refers to it 1575 */ 1576 __ip_vs_svc_put(svc, true); 1577 1578 /* decrease the module use count */ 1579 ip_vs_use_count_dec(); 1580 } 1581 1582 /* 1583 * Unlink a service from list and try to delete it if its refcnt reached 0 1584 */ 1585 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1586 { 1587 ip_vs_unregister_conntrack(svc); 1588 /* Hold svc to avoid double release from dest_trash */ 1589 atomic_inc(&svc->refcnt); 1590 /* 1591 * Unhash it from the service table 1592 */ 1593 ip_vs_svc_unhash(svc); 1594 1595 __ip_vs_del_service(svc, cleanup); 1596 } 1597 1598 /* 1599 * Delete a service from the service list 1600 */ 1601 static int ip_vs_del_service(struct ip_vs_service *svc) 1602 { 1603 if (svc == NULL) 1604 return -EEXIST; 1605 ip_vs_unlink_service(svc, false); 1606 1607 return 0; 1608 } 1609 1610 1611 /* 1612 * Flush all the virtual services 1613 */ 1614 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) 1615 { 1616 int idx; 1617 struct ip_vs_service *svc; 1618 struct hlist_node *n; 1619 1620 /* 1621 * Flush the service table hashed by <netns,protocol,addr,port> 1622 */ 1623 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1624 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1625 s_list) { 1626 if (svc->ipvs == ipvs) 1627 ip_vs_unlink_service(svc, cleanup); 1628 } 1629 } 1630 1631 /* 1632 * Flush the service table hashed by fwmark 1633 */ 1634 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1635 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1636 f_list) { 1637 if (svc->ipvs == ipvs) 1638 ip_vs_unlink_service(svc, cleanup); 1639 } 1640 } 1641 1642 return 0; 1643 } 1644 1645 /* 1646 * Delete service by {netns} in the service table. 1647 * Called by __ip_vs_batch_cleanup() 1648 */ 1649 void ip_vs_service_nets_cleanup(struct list_head *net_list) 1650 { 1651 struct netns_ipvs *ipvs; 1652 struct net *net; 1653 1654 EnterFunction(2); 1655 /* Check for "full" addressed entries */ 1656 mutex_lock(&__ip_vs_mutex); 1657 list_for_each_entry(net, net_list, exit_list) { 1658 ipvs = net_ipvs(net); 1659 ip_vs_flush(ipvs, true); 1660 } 1661 mutex_unlock(&__ip_vs_mutex); 1662 LeaveFunction(2); 1663 } 1664 1665 /* Put all references for device (dst_cache) */ 1666 static inline void 1667 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) 1668 { 1669 struct ip_vs_dest_dst *dest_dst; 1670 1671 spin_lock_bh(&dest->dst_lock); 1672 dest_dst = rcu_dereference_protected(dest->dest_dst, 1); 1673 if (dest_dst && dest_dst->dst_cache->dev == dev) { 1674 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1675 dev->name, 1676 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1677 ntohs(dest->port), 1678 refcount_read(&dest->refcnt)); 1679 __ip_vs_dst_cache_reset(dest); 1680 } 1681 spin_unlock_bh(&dest->dst_lock); 1682 1683 } 1684 /* Netdev event receiver 1685 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1686 */ 1687 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1688 void *ptr) 1689 { 1690 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1691 struct net *net = dev_net(dev); 1692 struct netns_ipvs *ipvs = net_ipvs(net); 1693 struct ip_vs_service *svc; 1694 struct ip_vs_dest *dest; 1695 unsigned int idx; 1696 1697 if (event != NETDEV_DOWN || !ipvs) 1698 return NOTIFY_DONE; 1699 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1700 EnterFunction(2); 1701 mutex_lock(&__ip_vs_mutex); 1702 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1703 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1704 if (svc->ipvs == ipvs) { 1705 list_for_each_entry(dest, &svc->destinations, 1706 n_list) { 1707 ip_vs_forget_dev(dest, dev); 1708 } 1709 } 1710 } 1711 1712 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1713 if (svc->ipvs == ipvs) { 1714 list_for_each_entry(dest, &svc->destinations, 1715 n_list) { 1716 ip_vs_forget_dev(dest, dev); 1717 } 1718 } 1719 1720 } 1721 } 1722 1723 spin_lock_bh(&ipvs->dest_trash_lock); 1724 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 1725 ip_vs_forget_dev(dest, dev); 1726 } 1727 spin_unlock_bh(&ipvs->dest_trash_lock); 1728 mutex_unlock(&__ip_vs_mutex); 1729 LeaveFunction(2); 1730 return NOTIFY_DONE; 1731 } 1732 1733 /* 1734 * Zero counters in a service or all services 1735 */ 1736 static int ip_vs_zero_service(struct ip_vs_service *svc) 1737 { 1738 struct ip_vs_dest *dest; 1739 1740 list_for_each_entry(dest, &svc->destinations, n_list) { 1741 ip_vs_zero_stats(&dest->stats); 1742 } 1743 ip_vs_zero_stats(&svc->stats); 1744 return 0; 1745 } 1746 1747 static int ip_vs_zero_all(struct netns_ipvs *ipvs) 1748 { 1749 int idx; 1750 struct ip_vs_service *svc; 1751 1752 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1753 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1754 if (svc->ipvs == ipvs) 1755 ip_vs_zero_service(svc); 1756 } 1757 } 1758 1759 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1760 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1761 if (svc->ipvs == ipvs) 1762 ip_vs_zero_service(svc); 1763 } 1764 } 1765 1766 ip_vs_zero_stats(&ipvs->tot_stats); 1767 return 0; 1768 } 1769 1770 #ifdef CONFIG_SYSCTL 1771 1772 static int three = 3; 1773 1774 static int 1775 proc_do_defense_mode(struct ctl_table *table, int write, 1776 void *buffer, size_t *lenp, loff_t *ppos) 1777 { 1778 struct netns_ipvs *ipvs = table->extra2; 1779 int *valp = table->data; 1780 int val = *valp; 1781 int rc; 1782 1783 struct ctl_table tmp = { 1784 .data = &val, 1785 .maxlen = sizeof(int), 1786 .mode = table->mode, 1787 }; 1788 1789 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1790 if (write && (*valp != val)) { 1791 if (val < 0 || val > 3) { 1792 rc = -EINVAL; 1793 } else { 1794 *valp = val; 1795 update_defense_level(ipvs); 1796 } 1797 } 1798 return rc; 1799 } 1800 1801 static int 1802 proc_do_sync_threshold(struct ctl_table *table, int write, 1803 void *buffer, size_t *lenp, loff_t *ppos) 1804 { 1805 int *valp = table->data; 1806 int val[2]; 1807 int rc; 1808 struct ctl_table tmp = { 1809 .data = &val, 1810 .maxlen = table->maxlen, 1811 .mode = table->mode, 1812 }; 1813 1814 memcpy(val, valp, sizeof(val)); 1815 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1816 if (write) { 1817 if (val[0] < 0 || val[1] < 0 || 1818 (val[0] >= val[1] && val[1])) 1819 rc = -EINVAL; 1820 else 1821 memcpy(valp, val, sizeof(val)); 1822 } 1823 return rc; 1824 } 1825 1826 static int 1827 proc_do_sync_ports(struct ctl_table *table, int write, 1828 void *buffer, size_t *lenp, loff_t *ppos) 1829 { 1830 int *valp = table->data; 1831 int val = *valp; 1832 int rc; 1833 1834 struct ctl_table tmp = { 1835 .data = &val, 1836 .maxlen = sizeof(int), 1837 .mode = table->mode, 1838 }; 1839 1840 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1841 if (write && (*valp != val)) { 1842 if (val < 1 || !is_power_of_2(val)) 1843 rc = -EINVAL; 1844 else 1845 *valp = val; 1846 } 1847 return rc; 1848 } 1849 1850 /* 1851 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1852 * Do not change order or insert new entries without 1853 * align with netns init in ip_vs_control_net_init() 1854 */ 1855 1856 static struct ctl_table vs_vars[] = { 1857 { 1858 .procname = "amemthresh", 1859 .maxlen = sizeof(int), 1860 .mode = 0644, 1861 .proc_handler = proc_dointvec, 1862 }, 1863 { 1864 .procname = "am_droprate", 1865 .maxlen = sizeof(int), 1866 .mode = 0644, 1867 .proc_handler = proc_dointvec, 1868 }, 1869 { 1870 .procname = "drop_entry", 1871 .maxlen = sizeof(int), 1872 .mode = 0644, 1873 .proc_handler = proc_do_defense_mode, 1874 }, 1875 { 1876 .procname = "drop_packet", 1877 .maxlen = sizeof(int), 1878 .mode = 0644, 1879 .proc_handler = proc_do_defense_mode, 1880 }, 1881 #ifdef CONFIG_IP_VS_NFCT 1882 { 1883 .procname = "conntrack", 1884 .maxlen = sizeof(int), 1885 .mode = 0644, 1886 .proc_handler = &proc_dointvec, 1887 }, 1888 #endif 1889 { 1890 .procname = "secure_tcp", 1891 .maxlen = sizeof(int), 1892 .mode = 0644, 1893 .proc_handler = proc_do_defense_mode, 1894 }, 1895 { 1896 .procname = "snat_reroute", 1897 .maxlen = sizeof(int), 1898 .mode = 0644, 1899 .proc_handler = &proc_dointvec, 1900 }, 1901 { 1902 .procname = "sync_version", 1903 .maxlen = sizeof(int), 1904 .mode = 0644, 1905 .proc_handler = proc_dointvec_minmax, 1906 .extra1 = SYSCTL_ZERO, 1907 .extra2 = SYSCTL_ONE, 1908 }, 1909 { 1910 .procname = "sync_ports", 1911 .maxlen = sizeof(int), 1912 .mode = 0644, 1913 .proc_handler = proc_do_sync_ports, 1914 }, 1915 { 1916 .procname = "sync_persist_mode", 1917 .maxlen = sizeof(int), 1918 .mode = 0644, 1919 .proc_handler = proc_dointvec, 1920 }, 1921 { 1922 .procname = "sync_qlen_max", 1923 .maxlen = sizeof(unsigned long), 1924 .mode = 0644, 1925 .proc_handler = proc_doulongvec_minmax, 1926 }, 1927 { 1928 .procname = "sync_sock_size", 1929 .maxlen = sizeof(int), 1930 .mode = 0644, 1931 .proc_handler = proc_dointvec, 1932 }, 1933 { 1934 .procname = "cache_bypass", 1935 .maxlen = sizeof(int), 1936 .mode = 0644, 1937 .proc_handler = proc_dointvec, 1938 }, 1939 { 1940 .procname = "expire_nodest_conn", 1941 .maxlen = sizeof(int), 1942 .mode = 0644, 1943 .proc_handler = proc_dointvec, 1944 }, 1945 { 1946 .procname = "sloppy_tcp", 1947 .maxlen = sizeof(int), 1948 .mode = 0644, 1949 .proc_handler = proc_dointvec, 1950 }, 1951 { 1952 .procname = "sloppy_sctp", 1953 .maxlen = sizeof(int), 1954 .mode = 0644, 1955 .proc_handler = proc_dointvec, 1956 }, 1957 { 1958 .procname = "expire_quiescent_template", 1959 .maxlen = sizeof(int), 1960 .mode = 0644, 1961 .proc_handler = proc_dointvec, 1962 }, 1963 { 1964 .procname = "sync_threshold", 1965 .maxlen = 1966 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1967 .mode = 0644, 1968 .proc_handler = proc_do_sync_threshold, 1969 }, 1970 { 1971 .procname = "sync_refresh_period", 1972 .maxlen = sizeof(int), 1973 .mode = 0644, 1974 .proc_handler = proc_dointvec_jiffies, 1975 }, 1976 { 1977 .procname = "sync_retries", 1978 .maxlen = sizeof(int), 1979 .mode = 0644, 1980 .proc_handler = proc_dointvec_minmax, 1981 .extra1 = SYSCTL_ZERO, 1982 .extra2 = &three, 1983 }, 1984 { 1985 .procname = "nat_icmp_send", 1986 .maxlen = sizeof(int), 1987 .mode = 0644, 1988 .proc_handler = proc_dointvec, 1989 }, 1990 { 1991 .procname = "pmtu_disc", 1992 .maxlen = sizeof(int), 1993 .mode = 0644, 1994 .proc_handler = proc_dointvec, 1995 }, 1996 { 1997 .procname = "backup_only", 1998 .maxlen = sizeof(int), 1999 .mode = 0644, 2000 .proc_handler = proc_dointvec, 2001 }, 2002 { 2003 .procname = "conn_reuse_mode", 2004 .maxlen = sizeof(int), 2005 .mode = 0644, 2006 .proc_handler = proc_dointvec, 2007 }, 2008 { 2009 .procname = "schedule_icmp", 2010 .maxlen = sizeof(int), 2011 .mode = 0644, 2012 .proc_handler = proc_dointvec, 2013 }, 2014 { 2015 .procname = "ignore_tunneled", 2016 .maxlen = sizeof(int), 2017 .mode = 0644, 2018 .proc_handler = proc_dointvec, 2019 }, 2020 #ifdef CONFIG_IP_VS_DEBUG 2021 { 2022 .procname = "debug_level", 2023 .data = &sysctl_ip_vs_debug_level, 2024 .maxlen = sizeof(int), 2025 .mode = 0644, 2026 .proc_handler = proc_dointvec, 2027 }, 2028 #endif 2029 { } 2030 }; 2031 2032 #endif 2033 2034 #ifdef CONFIG_PROC_FS 2035 2036 struct ip_vs_iter { 2037 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 2038 struct hlist_head *table; 2039 int bucket; 2040 }; 2041 2042 /* 2043 * Write the contents of the VS rule table to a PROCfs file. 2044 * (It is kept just for backward compatibility) 2045 */ 2046 static inline const char *ip_vs_fwd_name(unsigned int flags) 2047 { 2048 switch (flags & IP_VS_CONN_F_FWD_MASK) { 2049 case IP_VS_CONN_F_LOCALNODE: 2050 return "Local"; 2051 case IP_VS_CONN_F_TUNNEL: 2052 return "Tunnel"; 2053 case IP_VS_CONN_F_DROUTE: 2054 return "Route"; 2055 default: 2056 return "Masq"; 2057 } 2058 } 2059 2060 2061 /* Get the Nth entry in the two lists */ 2062 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 2063 { 2064 struct net *net = seq_file_net(seq); 2065 struct netns_ipvs *ipvs = net_ipvs(net); 2066 struct ip_vs_iter *iter = seq->private; 2067 int idx; 2068 struct ip_vs_service *svc; 2069 2070 /* look in hash by protocol */ 2071 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2072 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 2073 if ((svc->ipvs == ipvs) && pos-- == 0) { 2074 iter->table = ip_vs_svc_table; 2075 iter->bucket = idx; 2076 return svc; 2077 } 2078 } 2079 } 2080 2081 /* keep looking in fwmark */ 2082 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2083 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 2084 f_list) { 2085 if ((svc->ipvs == ipvs) && pos-- == 0) { 2086 iter->table = ip_vs_svc_fwm_table; 2087 iter->bucket = idx; 2088 return svc; 2089 } 2090 } 2091 } 2092 2093 return NULL; 2094 } 2095 2096 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 2097 __acquires(RCU) 2098 { 2099 rcu_read_lock(); 2100 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 2101 } 2102 2103 2104 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2105 { 2106 struct hlist_node *e; 2107 struct ip_vs_iter *iter; 2108 struct ip_vs_service *svc; 2109 2110 ++*pos; 2111 if (v == SEQ_START_TOKEN) 2112 return ip_vs_info_array(seq,0); 2113 2114 svc = v; 2115 iter = seq->private; 2116 2117 if (iter->table == ip_vs_svc_table) { 2118 /* next service in table hashed by protocol */ 2119 e = rcu_dereference(hlist_next_rcu(&svc->s_list)); 2120 if (e) 2121 return hlist_entry(e, struct ip_vs_service, s_list); 2122 2123 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2124 hlist_for_each_entry_rcu(svc, 2125 &ip_vs_svc_table[iter->bucket], 2126 s_list) { 2127 return svc; 2128 } 2129 } 2130 2131 iter->table = ip_vs_svc_fwm_table; 2132 iter->bucket = -1; 2133 goto scan_fwmark; 2134 } 2135 2136 /* next service in hashed by fwmark */ 2137 e = rcu_dereference(hlist_next_rcu(&svc->f_list)); 2138 if (e) 2139 return hlist_entry(e, struct ip_vs_service, f_list); 2140 2141 scan_fwmark: 2142 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2143 hlist_for_each_entry_rcu(svc, 2144 &ip_vs_svc_fwm_table[iter->bucket], 2145 f_list) 2146 return svc; 2147 } 2148 2149 return NULL; 2150 } 2151 2152 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 2153 __releases(RCU) 2154 { 2155 rcu_read_unlock(); 2156 } 2157 2158 2159 static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 2160 { 2161 if (v == SEQ_START_TOKEN) { 2162 seq_printf(seq, 2163 "IP Virtual Server version %d.%d.%d (size=%d)\n", 2164 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2165 seq_puts(seq, 2166 "Prot LocalAddress:Port Scheduler Flags\n"); 2167 seq_puts(seq, 2168 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2169 } else { 2170 struct net *net = seq_file_net(seq); 2171 struct netns_ipvs *ipvs = net_ipvs(net); 2172 const struct ip_vs_service *svc = v; 2173 const struct ip_vs_iter *iter = seq->private; 2174 const struct ip_vs_dest *dest; 2175 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2176 char *sched_name = sched ? sched->name : "none"; 2177 2178 if (svc->ipvs != ipvs) 2179 return 0; 2180 if (iter->table == ip_vs_svc_table) { 2181 #ifdef CONFIG_IP_VS_IPV6 2182 if (svc->af == AF_INET6) 2183 seq_printf(seq, "%s [%pI6]:%04X %s ", 2184 ip_vs_proto_name(svc->protocol), 2185 &svc->addr.in6, 2186 ntohs(svc->port), 2187 sched_name); 2188 else 2189 #endif 2190 seq_printf(seq, "%s %08X:%04X %s %s ", 2191 ip_vs_proto_name(svc->protocol), 2192 ntohl(svc->addr.ip), 2193 ntohs(svc->port), 2194 sched_name, 2195 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2196 } else { 2197 seq_printf(seq, "FWM %08X %s %s", 2198 svc->fwmark, sched_name, 2199 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2200 } 2201 2202 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 2203 seq_printf(seq, "persistent %d %08X\n", 2204 svc->timeout, 2205 ntohl(svc->netmask)); 2206 else 2207 seq_putc(seq, '\n'); 2208 2209 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 2210 #ifdef CONFIG_IP_VS_IPV6 2211 if (dest->af == AF_INET6) 2212 seq_printf(seq, 2213 " -> [%pI6]:%04X" 2214 " %-7s %-6d %-10d %-10d\n", 2215 &dest->addr.in6, 2216 ntohs(dest->port), 2217 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2218 atomic_read(&dest->weight), 2219 atomic_read(&dest->activeconns), 2220 atomic_read(&dest->inactconns)); 2221 else 2222 #endif 2223 seq_printf(seq, 2224 " -> %08X:%04X " 2225 "%-7s %-6d %-10d %-10d\n", 2226 ntohl(dest->addr.ip), 2227 ntohs(dest->port), 2228 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2229 atomic_read(&dest->weight), 2230 atomic_read(&dest->activeconns), 2231 atomic_read(&dest->inactconns)); 2232 2233 } 2234 } 2235 return 0; 2236 } 2237 2238 static const struct seq_operations ip_vs_info_seq_ops = { 2239 .start = ip_vs_info_seq_start, 2240 .next = ip_vs_info_seq_next, 2241 .stop = ip_vs_info_seq_stop, 2242 .show = ip_vs_info_seq_show, 2243 }; 2244 2245 static int ip_vs_stats_show(struct seq_file *seq, void *v) 2246 { 2247 struct net *net = seq_file_single_net(seq); 2248 struct ip_vs_kstats show; 2249 2250 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2251 seq_puts(seq, 2252 " Total Incoming Outgoing Incoming Outgoing\n"); 2253 seq_puts(seq, 2254 " Conns Packets Packets Bytes Bytes\n"); 2255 2256 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); 2257 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", 2258 (unsigned long long)show.conns, 2259 (unsigned long long)show.inpkts, 2260 (unsigned long long)show.outpkts, 2261 (unsigned long long)show.inbytes, 2262 (unsigned long long)show.outbytes); 2263 2264 /* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ 2265 seq_puts(seq, 2266 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2267 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", 2268 (unsigned long long)show.cps, 2269 (unsigned long long)show.inpps, 2270 (unsigned long long)show.outpps, 2271 (unsigned long long)show.inbps, 2272 (unsigned long long)show.outbps); 2273 2274 return 0; 2275 } 2276 2277 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2278 { 2279 struct net *net = seq_file_single_net(seq); 2280 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2281 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; 2282 struct ip_vs_kstats kstats; 2283 int i; 2284 2285 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2286 seq_puts(seq, 2287 " Total Incoming Outgoing Incoming Outgoing\n"); 2288 seq_puts(seq, 2289 "CPU Conns Packets Packets Bytes Bytes\n"); 2290 2291 for_each_possible_cpu(i) { 2292 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); 2293 unsigned int start; 2294 u64 conns, inpkts, outpkts, inbytes, outbytes; 2295 2296 do { 2297 start = u64_stats_fetch_begin_irq(&u->syncp); 2298 conns = u->cnt.conns; 2299 inpkts = u->cnt.inpkts; 2300 outpkts = u->cnt.outpkts; 2301 inbytes = u->cnt.inbytes; 2302 outbytes = u->cnt.outbytes; 2303 } while (u64_stats_fetch_retry_irq(&u->syncp, start)); 2304 2305 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", 2306 i, (u64)conns, (u64)inpkts, 2307 (u64)outpkts, (u64)inbytes, 2308 (u64)outbytes); 2309 } 2310 2311 ip_vs_copy_stats(&kstats, tot_stats); 2312 2313 seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", 2314 (unsigned long long)kstats.conns, 2315 (unsigned long long)kstats.inpkts, 2316 (unsigned long long)kstats.outpkts, 2317 (unsigned long long)kstats.inbytes, 2318 (unsigned long long)kstats.outbytes); 2319 2320 /* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2321 seq_puts(seq, 2322 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2323 seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", 2324 kstats.cps, 2325 kstats.inpps, 2326 kstats.outpps, 2327 kstats.inbps, 2328 kstats.outbps); 2329 2330 return 0; 2331 } 2332 #endif 2333 2334 /* 2335 * Set timeout values for tcp tcpfin udp in the timeout_table. 2336 */ 2337 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2338 { 2339 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2340 struct ip_vs_proto_data *pd; 2341 #endif 2342 2343 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2344 u->tcp_timeout, 2345 u->tcp_fin_timeout, 2346 u->udp_timeout); 2347 2348 #ifdef CONFIG_IP_VS_PROTO_TCP 2349 if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || 2350 u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { 2351 return -EINVAL; 2352 } 2353 #endif 2354 2355 #ifdef CONFIG_IP_VS_PROTO_UDP 2356 if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) 2357 return -EINVAL; 2358 #endif 2359 2360 #ifdef CONFIG_IP_VS_PROTO_TCP 2361 if (u->tcp_timeout) { 2362 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2363 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2364 = u->tcp_timeout * HZ; 2365 } 2366 2367 if (u->tcp_fin_timeout) { 2368 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2369 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2370 = u->tcp_fin_timeout * HZ; 2371 } 2372 #endif 2373 2374 #ifdef CONFIG_IP_VS_PROTO_UDP 2375 if (u->udp_timeout) { 2376 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2377 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2378 = u->udp_timeout * HZ; 2379 } 2380 #endif 2381 return 0; 2382 } 2383 2384 #define CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2385 2386 struct ip_vs_svcdest_user { 2387 struct ip_vs_service_user s; 2388 struct ip_vs_dest_user d; 2389 }; 2390 2391 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = { 2392 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user), 2393 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user), 2394 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user), 2395 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user), 2396 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user), 2397 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user), 2398 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2399 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user), 2400 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user), 2401 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user), 2402 }; 2403 2404 union ip_vs_set_arglen { 2405 struct ip_vs_service_user field_IP_VS_SO_SET_ADD; 2406 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT; 2407 struct ip_vs_service_user field_IP_VS_SO_SET_DEL; 2408 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST; 2409 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST; 2410 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST; 2411 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT; 2412 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON; 2413 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON; 2414 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO; 2415 }; 2416 2417 #define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen) 2418 2419 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2420 struct ip_vs_service_user *usvc_compat) 2421 { 2422 memset(usvc, 0, sizeof(*usvc)); 2423 2424 usvc->af = AF_INET; 2425 usvc->protocol = usvc_compat->protocol; 2426 usvc->addr.ip = usvc_compat->addr; 2427 usvc->port = usvc_compat->port; 2428 usvc->fwmark = usvc_compat->fwmark; 2429 2430 /* Deep copy of sched_name is not needed here */ 2431 usvc->sched_name = usvc_compat->sched_name; 2432 2433 usvc->flags = usvc_compat->flags; 2434 usvc->timeout = usvc_compat->timeout; 2435 usvc->netmask = usvc_compat->netmask; 2436 } 2437 2438 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2439 struct ip_vs_dest_user *udest_compat) 2440 { 2441 memset(udest, 0, sizeof(*udest)); 2442 2443 udest->addr.ip = udest_compat->addr; 2444 udest->port = udest_compat->port; 2445 udest->conn_flags = udest_compat->conn_flags; 2446 udest->weight = udest_compat->weight; 2447 udest->u_threshold = udest_compat->u_threshold; 2448 udest->l_threshold = udest_compat->l_threshold; 2449 udest->af = AF_INET; 2450 udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP; 2451 } 2452 2453 static int 2454 do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) 2455 { 2456 struct net *net = sock_net(sk); 2457 int ret; 2458 unsigned char arg[MAX_SET_ARGLEN]; 2459 struct ip_vs_service_user *usvc_compat; 2460 struct ip_vs_service_user_kern usvc; 2461 struct ip_vs_service *svc; 2462 struct ip_vs_dest_user *udest_compat; 2463 struct ip_vs_dest_user_kern udest; 2464 struct netns_ipvs *ipvs = net_ipvs(net); 2465 2466 BUILD_BUG_ON(sizeof(arg) > 255); 2467 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2468 return -EPERM; 2469 2470 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 2471 return -EINVAL; 2472 if (len != set_arglen[CMDID(cmd)]) { 2473 IP_VS_DBG(1, "set_ctl: len %u != %u\n", 2474 len, set_arglen[CMDID(cmd)]); 2475 return -EINVAL; 2476 } 2477 2478 if (copy_from_sockptr(arg, ptr, len) != 0) 2479 return -EFAULT; 2480 2481 /* Handle daemons since they have another lock */ 2482 if (cmd == IP_VS_SO_SET_STARTDAEMON || 2483 cmd == IP_VS_SO_SET_STOPDAEMON) { 2484 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2485 2486 if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2487 struct ipvs_sync_daemon_cfg cfg; 2488 2489 memset(&cfg, 0, sizeof(cfg)); 2490 ret = -EINVAL; 2491 if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, 2492 sizeof(cfg.mcast_ifn)) <= 0) 2493 return ret; 2494 cfg.syncid = dm->syncid; 2495 ret = start_sync_thread(ipvs, &cfg, dm->state); 2496 } else { 2497 ret = stop_sync_thread(ipvs, dm->state); 2498 } 2499 return ret; 2500 } 2501 2502 mutex_lock(&__ip_vs_mutex); 2503 if (cmd == IP_VS_SO_SET_FLUSH) { 2504 /* Flush the virtual service */ 2505 ret = ip_vs_flush(ipvs, false); 2506 goto out_unlock; 2507 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2508 /* Set timeout values for (tcp tcpfin udp) */ 2509 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); 2510 goto out_unlock; 2511 } 2512 2513 usvc_compat = (struct ip_vs_service_user *)arg; 2514 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2515 2516 /* We only use the new structs internally, so copy userspace compat 2517 * structs to extended internal versions */ 2518 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2519 ip_vs_copy_udest_compat(&udest, udest_compat); 2520 2521 if (cmd == IP_VS_SO_SET_ZERO) { 2522 /* if no service address is set, zero counters in all */ 2523 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2524 ret = ip_vs_zero_all(ipvs); 2525 goto out_unlock; 2526 } 2527 } 2528 2529 if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) && 2530 strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) == 2531 IP_VS_SCHEDNAME_MAXLEN) { 2532 ret = -EINVAL; 2533 goto out_unlock; 2534 } 2535 2536 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2537 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2538 usvc.protocol != IPPROTO_SCTP) { 2539 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n", 2540 usvc.protocol, &usvc.addr.ip, 2541 ntohs(usvc.port)); 2542 ret = -EFAULT; 2543 goto out_unlock; 2544 } 2545 2546 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2547 rcu_read_lock(); 2548 if (usvc.fwmark == 0) 2549 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol, 2550 &usvc.addr, usvc.port); 2551 else 2552 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark); 2553 rcu_read_unlock(); 2554 2555 if (cmd != IP_VS_SO_SET_ADD 2556 && (svc == NULL || svc->protocol != usvc.protocol)) { 2557 ret = -ESRCH; 2558 goto out_unlock; 2559 } 2560 2561 switch (cmd) { 2562 case IP_VS_SO_SET_ADD: 2563 if (svc != NULL) 2564 ret = -EEXIST; 2565 else 2566 ret = ip_vs_add_service(ipvs, &usvc, &svc); 2567 break; 2568 case IP_VS_SO_SET_EDIT: 2569 ret = ip_vs_edit_service(svc, &usvc); 2570 break; 2571 case IP_VS_SO_SET_DEL: 2572 ret = ip_vs_del_service(svc); 2573 if (!ret) 2574 goto out_unlock; 2575 break; 2576 case IP_VS_SO_SET_ZERO: 2577 ret = ip_vs_zero_service(svc); 2578 break; 2579 case IP_VS_SO_SET_ADDDEST: 2580 ret = ip_vs_add_dest(svc, &udest); 2581 break; 2582 case IP_VS_SO_SET_EDITDEST: 2583 ret = ip_vs_edit_dest(svc, &udest); 2584 break; 2585 case IP_VS_SO_SET_DELDEST: 2586 ret = ip_vs_del_dest(svc, &udest); 2587 break; 2588 default: 2589 ret = -EINVAL; 2590 } 2591 2592 out_unlock: 2593 mutex_unlock(&__ip_vs_mutex); 2594 return ret; 2595 } 2596 2597 2598 static void 2599 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2600 { 2601 struct ip_vs_scheduler *sched; 2602 struct ip_vs_kstats kstats; 2603 char *sched_name; 2604 2605 sched = rcu_dereference_protected(src->scheduler, 1); 2606 sched_name = sched ? sched->name : "none"; 2607 dst->protocol = src->protocol; 2608 dst->addr = src->addr.ip; 2609 dst->port = src->port; 2610 dst->fwmark = src->fwmark; 2611 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); 2612 dst->flags = src->flags; 2613 dst->timeout = src->timeout / HZ; 2614 dst->netmask = src->netmask; 2615 dst->num_dests = src->num_dests; 2616 ip_vs_copy_stats(&kstats, &src->stats); 2617 ip_vs_export_stats_user(&dst->stats, &kstats); 2618 } 2619 2620 static inline int 2621 __ip_vs_get_service_entries(struct netns_ipvs *ipvs, 2622 const struct ip_vs_get_services *get, 2623 struct ip_vs_get_services __user *uptr) 2624 { 2625 int idx, count=0; 2626 struct ip_vs_service *svc; 2627 struct ip_vs_service_entry entry; 2628 int ret = 0; 2629 2630 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2631 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2632 /* Only expose IPv4 entries to old interface */ 2633 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2634 continue; 2635 2636 if (count >= get->num_services) 2637 goto out; 2638 memset(&entry, 0, sizeof(entry)); 2639 ip_vs_copy_service(&entry, svc); 2640 if (copy_to_user(&uptr->entrytable[count], 2641 &entry, sizeof(entry))) { 2642 ret = -EFAULT; 2643 goto out; 2644 } 2645 count++; 2646 } 2647 } 2648 2649 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2650 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2651 /* Only expose IPv4 entries to old interface */ 2652 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2653 continue; 2654 2655 if (count >= get->num_services) 2656 goto out; 2657 memset(&entry, 0, sizeof(entry)); 2658 ip_vs_copy_service(&entry, svc); 2659 if (copy_to_user(&uptr->entrytable[count], 2660 &entry, sizeof(entry))) { 2661 ret = -EFAULT; 2662 goto out; 2663 } 2664 count++; 2665 } 2666 } 2667 out: 2668 return ret; 2669 } 2670 2671 static inline int 2672 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get, 2673 struct ip_vs_get_dests __user *uptr) 2674 { 2675 struct ip_vs_service *svc; 2676 union nf_inet_addr addr = { .ip = get->addr }; 2677 int ret = 0; 2678 2679 rcu_read_lock(); 2680 if (get->fwmark) 2681 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark); 2682 else 2683 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr, 2684 get->port); 2685 rcu_read_unlock(); 2686 2687 if (svc) { 2688 int count = 0; 2689 struct ip_vs_dest *dest; 2690 struct ip_vs_dest_entry entry; 2691 struct ip_vs_kstats kstats; 2692 2693 memset(&entry, 0, sizeof(entry)); 2694 list_for_each_entry(dest, &svc->destinations, n_list) { 2695 if (count >= get->num_dests) 2696 break; 2697 2698 /* Cannot expose heterogeneous members via sockopt 2699 * interface 2700 */ 2701 if (dest->af != svc->af) 2702 continue; 2703 2704 entry.addr = dest->addr.ip; 2705 entry.port = dest->port; 2706 entry.conn_flags = atomic_read(&dest->conn_flags); 2707 entry.weight = atomic_read(&dest->weight); 2708 entry.u_threshold = dest->u_threshold; 2709 entry.l_threshold = dest->l_threshold; 2710 entry.activeconns = atomic_read(&dest->activeconns); 2711 entry.inactconns = atomic_read(&dest->inactconns); 2712 entry.persistconns = atomic_read(&dest->persistconns); 2713 ip_vs_copy_stats(&kstats, &dest->stats); 2714 ip_vs_export_stats_user(&entry.stats, &kstats); 2715 if (copy_to_user(&uptr->entrytable[count], 2716 &entry, sizeof(entry))) { 2717 ret = -EFAULT; 2718 break; 2719 } 2720 count++; 2721 } 2722 } else 2723 ret = -ESRCH; 2724 return ret; 2725 } 2726 2727 static inline void 2728 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2729 { 2730 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2731 struct ip_vs_proto_data *pd; 2732 #endif 2733 2734 memset(u, 0, sizeof (*u)); 2735 2736 #ifdef CONFIG_IP_VS_PROTO_TCP 2737 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2738 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2739 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2740 #endif 2741 #ifdef CONFIG_IP_VS_PROTO_UDP 2742 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2743 u->udp_timeout = 2744 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2745 #endif 2746 } 2747 2748 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = { 2749 [CMDID(IP_VS_SO_GET_VERSION)] = 64, 2750 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo), 2751 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services), 2752 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry), 2753 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests), 2754 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2755 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user), 2756 }; 2757 2758 union ip_vs_get_arglen { 2759 char field_IP_VS_SO_GET_VERSION[64]; 2760 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO; 2761 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES; 2762 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE; 2763 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS; 2764 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT; 2765 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2]; 2766 }; 2767 2768 #define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen) 2769 2770 static int 2771 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2772 { 2773 unsigned char arg[MAX_GET_ARGLEN]; 2774 int ret = 0; 2775 unsigned int copylen; 2776 struct net *net = sock_net(sk); 2777 struct netns_ipvs *ipvs = net_ipvs(net); 2778 2779 BUG_ON(!net); 2780 BUILD_BUG_ON(sizeof(arg) > 255); 2781 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2782 return -EPERM; 2783 2784 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2785 return -EINVAL; 2786 2787 copylen = get_arglen[CMDID(cmd)]; 2788 if (*len < (int) copylen) { 2789 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen); 2790 return -EINVAL; 2791 } 2792 2793 if (copy_from_user(arg, user, copylen) != 0) 2794 return -EFAULT; 2795 /* 2796 * Handle daemons first since it has its own locking 2797 */ 2798 if (cmd == IP_VS_SO_GET_DAEMON) { 2799 struct ip_vs_daemon_user d[2]; 2800 2801 memset(&d, 0, sizeof(d)); 2802 mutex_lock(&ipvs->sync_mutex); 2803 if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2804 d[0].state = IP_VS_STATE_MASTER; 2805 strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, 2806 sizeof(d[0].mcast_ifn)); 2807 d[0].syncid = ipvs->mcfg.syncid; 2808 } 2809 if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2810 d[1].state = IP_VS_STATE_BACKUP; 2811 strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, 2812 sizeof(d[1].mcast_ifn)); 2813 d[1].syncid = ipvs->bcfg.syncid; 2814 } 2815 if (copy_to_user(user, &d, sizeof(d)) != 0) 2816 ret = -EFAULT; 2817 mutex_unlock(&ipvs->sync_mutex); 2818 return ret; 2819 } 2820 2821 mutex_lock(&__ip_vs_mutex); 2822 switch (cmd) { 2823 case IP_VS_SO_GET_VERSION: 2824 { 2825 char buf[64]; 2826 2827 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2828 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2829 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2830 ret = -EFAULT; 2831 goto out; 2832 } 2833 *len = strlen(buf)+1; 2834 } 2835 break; 2836 2837 case IP_VS_SO_GET_INFO: 2838 { 2839 struct ip_vs_getinfo info; 2840 info.version = IP_VS_VERSION_CODE; 2841 info.size = ip_vs_conn_tab_size; 2842 info.num_services = ipvs->num_services; 2843 if (copy_to_user(user, &info, sizeof(info)) != 0) 2844 ret = -EFAULT; 2845 } 2846 break; 2847 2848 case IP_VS_SO_GET_SERVICES: 2849 { 2850 struct ip_vs_get_services *get; 2851 int size; 2852 2853 get = (struct ip_vs_get_services *)arg; 2854 size = struct_size(get, entrytable, get->num_services); 2855 if (*len != size) { 2856 pr_err("length: %u != %u\n", *len, size); 2857 ret = -EINVAL; 2858 goto out; 2859 } 2860 ret = __ip_vs_get_service_entries(ipvs, get, user); 2861 } 2862 break; 2863 2864 case IP_VS_SO_GET_SERVICE: 2865 { 2866 struct ip_vs_service_entry *entry; 2867 struct ip_vs_service *svc; 2868 union nf_inet_addr addr; 2869 2870 entry = (struct ip_vs_service_entry *)arg; 2871 addr.ip = entry->addr; 2872 rcu_read_lock(); 2873 if (entry->fwmark) 2874 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark); 2875 else 2876 svc = __ip_vs_service_find(ipvs, AF_INET, 2877 entry->protocol, &addr, 2878 entry->port); 2879 rcu_read_unlock(); 2880 if (svc) { 2881 ip_vs_copy_service(entry, svc); 2882 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2883 ret = -EFAULT; 2884 } else 2885 ret = -ESRCH; 2886 } 2887 break; 2888 2889 case IP_VS_SO_GET_DESTS: 2890 { 2891 struct ip_vs_get_dests *get; 2892 int size; 2893 2894 get = (struct ip_vs_get_dests *)arg; 2895 size = struct_size(get, entrytable, get->num_dests); 2896 if (*len != size) { 2897 pr_err("length: %u != %u\n", *len, size); 2898 ret = -EINVAL; 2899 goto out; 2900 } 2901 ret = __ip_vs_get_dest_entries(ipvs, get, user); 2902 } 2903 break; 2904 2905 case IP_VS_SO_GET_TIMEOUT: 2906 { 2907 struct ip_vs_timeout_user t; 2908 2909 __ip_vs_get_timeouts(ipvs, &t); 2910 if (copy_to_user(user, &t, sizeof(t)) != 0) 2911 ret = -EFAULT; 2912 } 2913 break; 2914 2915 default: 2916 ret = -EINVAL; 2917 } 2918 2919 out: 2920 mutex_unlock(&__ip_vs_mutex); 2921 return ret; 2922 } 2923 2924 2925 static struct nf_sockopt_ops ip_vs_sockopts = { 2926 .pf = PF_INET, 2927 .set_optmin = IP_VS_BASE_CTL, 2928 .set_optmax = IP_VS_SO_SET_MAX+1, 2929 .set = do_ip_vs_set_ctl, 2930 .get_optmin = IP_VS_BASE_CTL, 2931 .get_optmax = IP_VS_SO_GET_MAX+1, 2932 .get = do_ip_vs_get_ctl, 2933 .owner = THIS_MODULE, 2934 }; 2935 2936 /* 2937 * Generic Netlink interface 2938 */ 2939 2940 /* IPVS genetlink family */ 2941 static struct genl_family ip_vs_genl_family; 2942 2943 /* Policy used for first-level command attributes */ 2944 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2945 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2946 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2947 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2948 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2949 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2950 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2951 }; 2952 2953 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2954 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2955 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2956 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2957 .len = IP_VS_IFNAME_MAXLEN - 1 }, 2958 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2959 [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, 2960 [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, 2961 [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) }, 2962 [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 }, 2963 [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 }, 2964 }; 2965 2966 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2967 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2968 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2969 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2970 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2971 .len = sizeof(union nf_inet_addr) }, 2972 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2973 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2974 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2975 .len = IP_VS_SCHEDNAME_MAXLEN - 1 }, 2976 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, 2977 .len = IP_VS_PENAME_MAXLEN }, 2978 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2979 .len = sizeof(struct ip_vs_flags) }, 2980 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2981 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2982 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2983 }; 2984 2985 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2986 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2987 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2988 .len = sizeof(union nf_inet_addr) }, 2989 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2990 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2991 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2992 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2993 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2994 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 2995 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 2996 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 2997 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 2998 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, 2999 [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, 3000 [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, 3001 [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 }, 3002 }; 3003 3004 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 3005 struct ip_vs_kstats *kstats) 3006 { 3007 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3008 3009 if (!nl_stats) 3010 return -EMSGSIZE; 3011 3012 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || 3013 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || 3014 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || 3015 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3016 IPVS_STATS_ATTR_PAD) || 3017 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3018 IPVS_STATS_ATTR_PAD) || 3019 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || 3020 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || 3021 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || 3022 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || 3023 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) 3024 goto nla_put_failure; 3025 nla_nest_end(skb, nl_stats); 3026 3027 return 0; 3028 3029 nla_put_failure: 3030 nla_nest_cancel(skb, nl_stats); 3031 return -EMSGSIZE; 3032 } 3033 3034 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, 3035 struct ip_vs_kstats *kstats) 3036 { 3037 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3038 3039 if (!nl_stats) 3040 return -EMSGSIZE; 3041 3042 if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns, 3043 IPVS_STATS_ATTR_PAD) || 3044 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts, 3045 IPVS_STATS_ATTR_PAD) || 3046 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts, 3047 IPVS_STATS_ATTR_PAD) || 3048 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3049 IPVS_STATS_ATTR_PAD) || 3050 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3051 IPVS_STATS_ATTR_PAD) || 3052 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps, 3053 IPVS_STATS_ATTR_PAD) || 3054 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps, 3055 IPVS_STATS_ATTR_PAD) || 3056 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps, 3057 IPVS_STATS_ATTR_PAD) || 3058 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps, 3059 IPVS_STATS_ATTR_PAD) || 3060 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps, 3061 IPVS_STATS_ATTR_PAD)) 3062 goto nla_put_failure; 3063 nla_nest_end(skb, nl_stats); 3064 3065 return 0; 3066 3067 nla_put_failure: 3068 nla_nest_cancel(skb, nl_stats); 3069 return -EMSGSIZE; 3070 } 3071 3072 static int ip_vs_genl_fill_service(struct sk_buff *skb, 3073 struct ip_vs_service *svc) 3074 { 3075 struct ip_vs_scheduler *sched; 3076 struct ip_vs_pe *pe; 3077 struct nlattr *nl_service; 3078 struct ip_vs_flags flags = { .flags = svc->flags, 3079 .mask = ~0 }; 3080 struct ip_vs_kstats kstats; 3081 char *sched_name; 3082 3083 nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE); 3084 if (!nl_service) 3085 return -EMSGSIZE; 3086 3087 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) 3088 goto nla_put_failure; 3089 if (svc->fwmark) { 3090 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) 3091 goto nla_put_failure; 3092 } else { 3093 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 3094 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 3095 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 3096 goto nla_put_failure; 3097 } 3098 3099 sched = rcu_dereference_protected(svc->scheduler, 1); 3100 sched_name = sched ? sched->name : "none"; 3101 pe = rcu_dereference_protected(svc->pe, 1); 3102 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || 3103 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 3104 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 3105 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 3106 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 3107 goto nla_put_failure; 3108 ip_vs_copy_stats(&kstats, &svc->stats); 3109 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) 3110 goto nla_put_failure; 3111 if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) 3112 goto nla_put_failure; 3113 3114 nla_nest_end(skb, nl_service); 3115 3116 return 0; 3117 3118 nla_put_failure: 3119 nla_nest_cancel(skb, nl_service); 3120 return -EMSGSIZE; 3121 } 3122 3123 static int ip_vs_genl_dump_service(struct sk_buff *skb, 3124 struct ip_vs_service *svc, 3125 struct netlink_callback *cb) 3126 { 3127 void *hdr; 3128 3129 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3130 &ip_vs_genl_family, NLM_F_MULTI, 3131 IPVS_CMD_NEW_SERVICE); 3132 if (!hdr) 3133 return -EMSGSIZE; 3134 3135 if (ip_vs_genl_fill_service(skb, svc) < 0) 3136 goto nla_put_failure; 3137 3138 genlmsg_end(skb, hdr); 3139 return 0; 3140 3141 nla_put_failure: 3142 genlmsg_cancel(skb, hdr); 3143 return -EMSGSIZE; 3144 } 3145 3146 static int ip_vs_genl_dump_services(struct sk_buff *skb, 3147 struct netlink_callback *cb) 3148 { 3149 int idx = 0, i; 3150 int start = cb->args[0]; 3151 struct ip_vs_service *svc; 3152 struct net *net = sock_net(skb->sk); 3153 struct netns_ipvs *ipvs = net_ipvs(net); 3154 3155 mutex_lock(&__ip_vs_mutex); 3156 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3157 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 3158 if (++idx <= start || (svc->ipvs != ipvs)) 3159 continue; 3160 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3161 idx--; 3162 goto nla_put_failure; 3163 } 3164 } 3165 } 3166 3167 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3168 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 3169 if (++idx <= start || (svc->ipvs != ipvs)) 3170 continue; 3171 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3172 idx--; 3173 goto nla_put_failure; 3174 } 3175 } 3176 } 3177 3178 nla_put_failure: 3179 mutex_unlock(&__ip_vs_mutex); 3180 cb->args[0] = idx; 3181 3182 return skb->len; 3183 } 3184 3185 static bool ip_vs_is_af_valid(int af) 3186 { 3187 if (af == AF_INET) 3188 return true; 3189 #ifdef CONFIG_IP_VS_IPV6 3190 if (af == AF_INET6 && ipv6_mod_enabled()) 3191 return true; 3192 #endif 3193 return false; 3194 } 3195 3196 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, 3197 struct ip_vs_service_user_kern *usvc, 3198 struct nlattr *nla, bool full_entry, 3199 struct ip_vs_service **ret_svc) 3200 { 3201 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 3202 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 3203 struct ip_vs_service *svc; 3204 3205 /* Parse mandatory identifying service fields first */ 3206 if (nla == NULL || 3207 nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL)) 3208 return -EINVAL; 3209 3210 nla_af = attrs[IPVS_SVC_ATTR_AF]; 3211 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 3212 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 3213 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 3214 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 3215 3216 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 3217 return -EINVAL; 3218 3219 memset(usvc, 0, sizeof(*usvc)); 3220 3221 usvc->af = nla_get_u16(nla_af); 3222 if (!ip_vs_is_af_valid(usvc->af)) 3223 return -EAFNOSUPPORT; 3224 3225 if (nla_fwmark) { 3226 usvc->protocol = IPPROTO_TCP; 3227 usvc->fwmark = nla_get_u32(nla_fwmark); 3228 } else { 3229 usvc->protocol = nla_get_u16(nla_protocol); 3230 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 3231 usvc->port = nla_get_be16(nla_port); 3232 usvc->fwmark = 0; 3233 } 3234 3235 rcu_read_lock(); 3236 if (usvc->fwmark) 3237 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); 3238 else 3239 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, 3240 &usvc->addr, usvc->port); 3241 rcu_read_unlock(); 3242 *ret_svc = svc; 3243 3244 /* If a full entry was requested, check for the additional fields */ 3245 if (full_entry) { 3246 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, 3247 *nla_netmask; 3248 struct ip_vs_flags flags; 3249 3250 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 3251 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; 3252 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 3253 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 3254 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 3255 3256 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 3257 return -EINVAL; 3258 3259 nla_memcpy(&flags, nla_flags, sizeof(flags)); 3260 3261 /* prefill flags from service if it already exists */ 3262 if (svc) 3263 usvc->flags = svc->flags; 3264 3265 /* set new flags from userland */ 3266 usvc->flags = (usvc->flags & ~flags.mask) | 3267 (flags.flags & flags.mask); 3268 usvc->sched_name = nla_data(nla_sched); 3269 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3270 usvc->timeout = nla_get_u32(nla_timeout); 3271 usvc->netmask = nla_get_be32(nla_netmask); 3272 } 3273 3274 return 0; 3275 } 3276 3277 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, 3278 struct nlattr *nla) 3279 { 3280 struct ip_vs_service_user_kern usvc; 3281 struct ip_vs_service *svc; 3282 int ret; 3283 3284 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc); 3285 return ret ? ERR_PTR(ret) : svc; 3286 } 3287 3288 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 3289 { 3290 struct nlattr *nl_dest; 3291 struct ip_vs_kstats kstats; 3292 3293 nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST); 3294 if (!nl_dest) 3295 return -EMSGSIZE; 3296 3297 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3298 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3299 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3300 (atomic_read(&dest->conn_flags) & 3301 IP_VS_CONN_F_FWD_MASK)) || 3302 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, 3303 atomic_read(&dest->weight)) || 3304 nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE, 3305 dest->tun_type) || 3306 nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, 3307 dest->tun_port) || 3308 nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS, 3309 dest->tun_flags) || 3310 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || 3311 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || 3312 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 3313 atomic_read(&dest->activeconns)) || 3314 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, 3315 atomic_read(&dest->inactconns)) || 3316 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 3317 atomic_read(&dest->persistconns)) || 3318 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) 3319 goto nla_put_failure; 3320 ip_vs_copy_stats(&kstats, &dest->stats); 3321 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) 3322 goto nla_put_failure; 3323 if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) 3324 goto nla_put_failure; 3325 3326 nla_nest_end(skb, nl_dest); 3327 3328 return 0; 3329 3330 nla_put_failure: 3331 nla_nest_cancel(skb, nl_dest); 3332 return -EMSGSIZE; 3333 } 3334 3335 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 3336 struct netlink_callback *cb) 3337 { 3338 void *hdr; 3339 3340 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3341 &ip_vs_genl_family, NLM_F_MULTI, 3342 IPVS_CMD_NEW_DEST); 3343 if (!hdr) 3344 return -EMSGSIZE; 3345 3346 if (ip_vs_genl_fill_dest(skb, dest) < 0) 3347 goto nla_put_failure; 3348 3349 genlmsg_end(skb, hdr); 3350 return 0; 3351 3352 nla_put_failure: 3353 genlmsg_cancel(skb, hdr); 3354 return -EMSGSIZE; 3355 } 3356 3357 static int ip_vs_genl_dump_dests(struct sk_buff *skb, 3358 struct netlink_callback *cb) 3359 { 3360 int idx = 0; 3361 int start = cb->args[0]; 3362 struct ip_vs_service *svc; 3363 struct ip_vs_dest *dest; 3364 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3365 struct net *net = sock_net(skb->sk); 3366 struct netns_ipvs *ipvs = net_ipvs(net); 3367 3368 mutex_lock(&__ip_vs_mutex); 3369 3370 /* Try to find the service for which to dump destinations */ 3371 if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) 3372 goto out_err; 3373 3374 3375 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); 3376 if (IS_ERR_OR_NULL(svc)) 3377 goto out_err; 3378 3379 /* Dump the destinations */ 3380 list_for_each_entry(dest, &svc->destinations, n_list) { 3381 if (++idx <= start) 3382 continue; 3383 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 3384 idx--; 3385 goto nla_put_failure; 3386 } 3387 } 3388 3389 nla_put_failure: 3390 cb->args[0] = idx; 3391 3392 out_err: 3393 mutex_unlock(&__ip_vs_mutex); 3394 3395 return skb->len; 3396 } 3397 3398 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 3399 struct nlattr *nla, bool full_entry) 3400 { 3401 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 3402 struct nlattr *nla_addr, *nla_port; 3403 struct nlattr *nla_addr_family; 3404 3405 /* Parse mandatory identifying destination fields first */ 3406 if (nla == NULL || 3407 nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL)) 3408 return -EINVAL; 3409 3410 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 3411 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 3412 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; 3413 3414 if (!(nla_addr && nla_port)) 3415 return -EINVAL; 3416 3417 memset(udest, 0, sizeof(*udest)); 3418 3419 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3420 udest->port = nla_get_be16(nla_port); 3421 3422 if (nla_addr_family) 3423 udest->af = nla_get_u16(nla_addr_family); 3424 else 3425 udest->af = 0; 3426 3427 /* If a full entry was requested, check for the additional fields */ 3428 if (full_entry) { 3429 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 3430 *nla_l_thresh, *nla_tun_type, *nla_tun_port, 3431 *nla_tun_flags; 3432 3433 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 3434 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 3435 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 3436 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 3437 nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; 3438 nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; 3439 nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS]; 3440 3441 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 3442 return -EINVAL; 3443 3444 udest->conn_flags = nla_get_u32(nla_fwd) 3445 & IP_VS_CONN_F_FWD_MASK; 3446 udest->weight = nla_get_u32(nla_weight); 3447 udest->u_threshold = nla_get_u32(nla_u_thresh); 3448 udest->l_threshold = nla_get_u32(nla_l_thresh); 3449 3450 if (nla_tun_type) 3451 udest->tun_type = nla_get_u8(nla_tun_type); 3452 3453 if (nla_tun_port) 3454 udest->tun_port = nla_get_be16(nla_tun_port); 3455 3456 if (nla_tun_flags) 3457 udest->tun_flags = nla_get_u16(nla_tun_flags); 3458 } 3459 3460 return 0; 3461 } 3462 3463 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, 3464 struct ipvs_sync_daemon_cfg *c) 3465 { 3466 struct nlattr *nl_daemon; 3467 3468 nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON); 3469 if (!nl_daemon) 3470 return -EMSGSIZE; 3471 3472 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || 3473 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) || 3474 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) || 3475 nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) || 3476 nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) || 3477 nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl)) 3478 goto nla_put_failure; 3479 #ifdef CONFIG_IP_VS_IPV6 3480 if (c->mcast_af == AF_INET6) { 3481 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6, 3482 &c->mcast_group.in6)) 3483 goto nla_put_failure; 3484 } else 3485 #endif 3486 if (c->mcast_af == AF_INET && 3487 nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP, 3488 c->mcast_group.ip)) 3489 goto nla_put_failure; 3490 nla_nest_end(skb, nl_daemon); 3491 3492 return 0; 3493 3494 nla_put_failure: 3495 nla_nest_cancel(skb, nl_daemon); 3496 return -EMSGSIZE; 3497 } 3498 3499 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, 3500 struct ipvs_sync_daemon_cfg *c, 3501 struct netlink_callback *cb) 3502 { 3503 void *hdr; 3504 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3505 &ip_vs_genl_family, NLM_F_MULTI, 3506 IPVS_CMD_NEW_DAEMON); 3507 if (!hdr) 3508 return -EMSGSIZE; 3509 3510 if (ip_vs_genl_fill_daemon(skb, state, c)) 3511 goto nla_put_failure; 3512 3513 genlmsg_end(skb, hdr); 3514 return 0; 3515 3516 nla_put_failure: 3517 genlmsg_cancel(skb, hdr); 3518 return -EMSGSIZE; 3519 } 3520 3521 static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3522 struct netlink_callback *cb) 3523 { 3524 struct net *net = sock_net(skb->sk); 3525 struct netns_ipvs *ipvs = net_ipvs(net); 3526 3527 mutex_lock(&ipvs->sync_mutex); 3528 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3529 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3530 &ipvs->mcfg, cb) < 0) 3531 goto nla_put_failure; 3532 3533 cb->args[0] = 1; 3534 } 3535 3536 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3537 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3538 &ipvs->bcfg, cb) < 0) 3539 goto nla_put_failure; 3540 3541 cb->args[1] = 1; 3542 } 3543 3544 nla_put_failure: 3545 mutex_unlock(&ipvs->sync_mutex); 3546 3547 return skb->len; 3548 } 3549 3550 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3551 { 3552 struct ipvs_sync_daemon_cfg c; 3553 struct nlattr *a; 3554 int ret; 3555 3556 memset(&c, 0, sizeof(c)); 3557 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3558 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3559 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3560 return -EINVAL; 3561 strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3562 sizeof(c.mcast_ifn)); 3563 c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); 3564 3565 a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN]; 3566 if (a) 3567 c.sync_maxlen = nla_get_u16(a); 3568 3569 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP]; 3570 if (a) { 3571 c.mcast_af = AF_INET; 3572 c.mcast_group.ip = nla_get_in_addr(a); 3573 if (!ipv4_is_multicast(c.mcast_group.ip)) 3574 return -EINVAL; 3575 } else { 3576 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6]; 3577 if (a) { 3578 #ifdef CONFIG_IP_VS_IPV6 3579 int addr_type; 3580 3581 c.mcast_af = AF_INET6; 3582 c.mcast_group.in6 = nla_get_in6_addr(a); 3583 addr_type = ipv6_addr_type(&c.mcast_group.in6); 3584 if (!(addr_type & IPV6_ADDR_MULTICAST)) 3585 return -EINVAL; 3586 #else 3587 return -EAFNOSUPPORT; 3588 #endif 3589 } 3590 } 3591 3592 a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT]; 3593 if (a) 3594 c.mcast_port = nla_get_u16(a); 3595 3596 a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL]; 3597 if (a) 3598 c.mcast_ttl = nla_get_u8(a); 3599 3600 /* The synchronization protocol is incompatible with mixed family 3601 * services 3602 */ 3603 if (ipvs->mixed_address_family_dests > 0) 3604 return -EINVAL; 3605 3606 ret = start_sync_thread(ipvs, &c, 3607 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3608 return ret; 3609 } 3610 3611 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3612 { 3613 int ret; 3614 3615 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3616 return -EINVAL; 3617 3618 ret = stop_sync_thread(ipvs, 3619 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3620 return ret; 3621 } 3622 3623 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs) 3624 { 3625 struct ip_vs_timeout_user t; 3626 3627 __ip_vs_get_timeouts(ipvs, &t); 3628 3629 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3630 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3631 3632 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 3633 t.tcp_fin_timeout = 3634 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 3635 3636 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3637 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3638 3639 return ip_vs_set_timeout(ipvs, &t); 3640 } 3641 3642 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3643 { 3644 int ret = -EINVAL, cmd; 3645 struct net *net = sock_net(skb->sk); 3646 struct netns_ipvs *ipvs = net_ipvs(net); 3647 3648 cmd = info->genlhdr->cmd; 3649 3650 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3651 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 3652 3653 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 3654 nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack)) 3655 goto out; 3656 3657 if (cmd == IPVS_CMD_NEW_DAEMON) 3658 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs); 3659 else 3660 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs); 3661 } 3662 3663 out: 3664 return ret; 3665 } 3666 3667 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3668 { 3669 bool need_full_svc = false, need_full_dest = false; 3670 struct ip_vs_service *svc = NULL; 3671 struct ip_vs_service_user_kern usvc; 3672 struct ip_vs_dest_user_kern udest; 3673 int ret = 0, cmd; 3674 struct net *net = sock_net(skb->sk); 3675 struct netns_ipvs *ipvs = net_ipvs(net); 3676 3677 cmd = info->genlhdr->cmd; 3678 3679 mutex_lock(&__ip_vs_mutex); 3680 3681 if (cmd == IPVS_CMD_FLUSH) { 3682 ret = ip_vs_flush(ipvs, false); 3683 goto out; 3684 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3685 ret = ip_vs_genl_set_config(ipvs, info->attrs); 3686 goto out; 3687 } else if (cmd == IPVS_CMD_ZERO && 3688 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3689 ret = ip_vs_zero_all(ipvs); 3690 goto out; 3691 } 3692 3693 /* All following commands require a service argument, so check if we 3694 * received a valid one. We need a full service specification when 3695 * adding / editing a service. Only identifying members otherwise. */ 3696 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3697 need_full_svc = true; 3698 3699 ret = ip_vs_genl_parse_service(ipvs, &usvc, 3700 info->attrs[IPVS_CMD_ATTR_SERVICE], 3701 need_full_svc, &svc); 3702 if (ret) 3703 goto out; 3704 3705 /* Unless we're adding a new service, the service must already exist */ 3706 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3707 ret = -ESRCH; 3708 goto out; 3709 } 3710 3711 /* Destination commands require a valid destination argument. For 3712 * adding / editing a destination, we need a full destination 3713 * specification. */ 3714 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3715 cmd == IPVS_CMD_DEL_DEST) { 3716 if (cmd != IPVS_CMD_DEL_DEST) 3717 need_full_dest = true; 3718 3719 ret = ip_vs_genl_parse_dest(&udest, 3720 info->attrs[IPVS_CMD_ATTR_DEST], 3721 need_full_dest); 3722 if (ret) 3723 goto out; 3724 3725 /* Old protocols did not allow the user to specify address 3726 * family, so we set it to zero instead. We also didn't 3727 * allow heterogeneous pools in the old code, so it's safe 3728 * to assume that this will have the same address family as 3729 * the service. 3730 */ 3731 if (udest.af == 0) 3732 udest.af = svc->af; 3733 3734 if (!ip_vs_is_af_valid(udest.af)) { 3735 ret = -EAFNOSUPPORT; 3736 goto out; 3737 } 3738 3739 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { 3740 /* The synchronization protocol is incompatible 3741 * with mixed family services 3742 */ 3743 if (ipvs->sync_state) { 3744 ret = -EINVAL; 3745 goto out; 3746 } 3747 3748 /* Which connection types do we support? */ 3749 switch (udest.conn_flags) { 3750 case IP_VS_CONN_F_TUNNEL: 3751 /* We are able to forward this */ 3752 break; 3753 default: 3754 ret = -EINVAL; 3755 goto out; 3756 } 3757 } 3758 } 3759 3760 switch (cmd) { 3761 case IPVS_CMD_NEW_SERVICE: 3762 if (svc == NULL) 3763 ret = ip_vs_add_service(ipvs, &usvc, &svc); 3764 else 3765 ret = -EEXIST; 3766 break; 3767 case IPVS_CMD_SET_SERVICE: 3768 ret = ip_vs_edit_service(svc, &usvc); 3769 break; 3770 case IPVS_CMD_DEL_SERVICE: 3771 ret = ip_vs_del_service(svc); 3772 /* do not use svc, it can be freed */ 3773 break; 3774 case IPVS_CMD_NEW_DEST: 3775 ret = ip_vs_add_dest(svc, &udest); 3776 break; 3777 case IPVS_CMD_SET_DEST: 3778 ret = ip_vs_edit_dest(svc, &udest); 3779 break; 3780 case IPVS_CMD_DEL_DEST: 3781 ret = ip_vs_del_dest(svc, &udest); 3782 break; 3783 case IPVS_CMD_ZERO: 3784 ret = ip_vs_zero_service(svc); 3785 break; 3786 default: 3787 ret = -EINVAL; 3788 } 3789 3790 out: 3791 mutex_unlock(&__ip_vs_mutex); 3792 3793 return ret; 3794 } 3795 3796 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3797 { 3798 struct sk_buff *msg; 3799 void *reply; 3800 int ret, cmd, reply_cmd; 3801 struct net *net = sock_net(skb->sk); 3802 struct netns_ipvs *ipvs = net_ipvs(net); 3803 3804 cmd = info->genlhdr->cmd; 3805 3806 if (cmd == IPVS_CMD_GET_SERVICE) 3807 reply_cmd = IPVS_CMD_NEW_SERVICE; 3808 else if (cmd == IPVS_CMD_GET_INFO) 3809 reply_cmd = IPVS_CMD_SET_INFO; 3810 else if (cmd == IPVS_CMD_GET_CONFIG) 3811 reply_cmd = IPVS_CMD_SET_CONFIG; 3812 else { 3813 pr_err("unknown Generic Netlink command\n"); 3814 return -EINVAL; 3815 } 3816 3817 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3818 if (!msg) 3819 return -ENOMEM; 3820 3821 mutex_lock(&__ip_vs_mutex); 3822 3823 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3824 if (reply == NULL) 3825 goto nla_put_failure; 3826 3827 switch (cmd) { 3828 case IPVS_CMD_GET_SERVICE: 3829 { 3830 struct ip_vs_service *svc; 3831 3832 svc = ip_vs_genl_find_service(ipvs, 3833 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3834 if (IS_ERR(svc)) { 3835 ret = PTR_ERR(svc); 3836 goto out_err; 3837 } else if (svc) { 3838 ret = ip_vs_genl_fill_service(msg, svc); 3839 if (ret) 3840 goto nla_put_failure; 3841 } else { 3842 ret = -ESRCH; 3843 goto out_err; 3844 } 3845 3846 break; 3847 } 3848 3849 case IPVS_CMD_GET_CONFIG: 3850 { 3851 struct ip_vs_timeout_user t; 3852 3853 __ip_vs_get_timeouts(ipvs, &t); 3854 #ifdef CONFIG_IP_VS_PROTO_TCP 3855 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3856 t.tcp_timeout) || 3857 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3858 t.tcp_fin_timeout)) 3859 goto nla_put_failure; 3860 #endif 3861 #ifdef CONFIG_IP_VS_PROTO_UDP 3862 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) 3863 goto nla_put_failure; 3864 #endif 3865 3866 break; 3867 } 3868 3869 case IPVS_CMD_GET_INFO: 3870 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, 3871 IP_VS_VERSION_CODE) || 3872 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3873 ip_vs_conn_tab_size)) 3874 goto nla_put_failure; 3875 break; 3876 } 3877 3878 genlmsg_end(msg, reply); 3879 ret = genlmsg_reply(msg, info); 3880 goto out; 3881 3882 nla_put_failure: 3883 pr_err("not enough space in Netlink message\n"); 3884 ret = -EMSGSIZE; 3885 3886 out_err: 3887 nlmsg_free(msg); 3888 out: 3889 mutex_unlock(&__ip_vs_mutex); 3890 3891 return ret; 3892 } 3893 3894 3895 static const struct genl_ops ip_vs_genl_ops[] = { 3896 { 3897 .cmd = IPVS_CMD_NEW_SERVICE, 3898 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3899 .flags = GENL_ADMIN_PERM, 3900 .doit = ip_vs_genl_set_cmd, 3901 }, 3902 { 3903 .cmd = IPVS_CMD_SET_SERVICE, 3904 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3905 .flags = GENL_ADMIN_PERM, 3906 .doit = ip_vs_genl_set_cmd, 3907 }, 3908 { 3909 .cmd = IPVS_CMD_DEL_SERVICE, 3910 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3911 .flags = GENL_ADMIN_PERM, 3912 .doit = ip_vs_genl_set_cmd, 3913 }, 3914 { 3915 .cmd = IPVS_CMD_GET_SERVICE, 3916 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3917 .flags = GENL_ADMIN_PERM, 3918 .doit = ip_vs_genl_get_cmd, 3919 .dumpit = ip_vs_genl_dump_services, 3920 }, 3921 { 3922 .cmd = IPVS_CMD_NEW_DEST, 3923 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3924 .flags = GENL_ADMIN_PERM, 3925 .doit = ip_vs_genl_set_cmd, 3926 }, 3927 { 3928 .cmd = IPVS_CMD_SET_DEST, 3929 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3930 .flags = GENL_ADMIN_PERM, 3931 .doit = ip_vs_genl_set_cmd, 3932 }, 3933 { 3934 .cmd = IPVS_CMD_DEL_DEST, 3935 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3936 .flags = GENL_ADMIN_PERM, 3937 .doit = ip_vs_genl_set_cmd, 3938 }, 3939 { 3940 .cmd = IPVS_CMD_GET_DEST, 3941 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3942 .flags = GENL_ADMIN_PERM, 3943 .dumpit = ip_vs_genl_dump_dests, 3944 }, 3945 { 3946 .cmd = IPVS_CMD_NEW_DAEMON, 3947 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3948 .flags = GENL_ADMIN_PERM, 3949 .doit = ip_vs_genl_set_daemon, 3950 }, 3951 { 3952 .cmd = IPVS_CMD_DEL_DAEMON, 3953 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3954 .flags = GENL_ADMIN_PERM, 3955 .doit = ip_vs_genl_set_daemon, 3956 }, 3957 { 3958 .cmd = IPVS_CMD_GET_DAEMON, 3959 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3960 .flags = GENL_ADMIN_PERM, 3961 .dumpit = ip_vs_genl_dump_daemons, 3962 }, 3963 { 3964 .cmd = IPVS_CMD_SET_CONFIG, 3965 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3966 .flags = GENL_ADMIN_PERM, 3967 .doit = ip_vs_genl_set_cmd, 3968 }, 3969 { 3970 .cmd = IPVS_CMD_GET_CONFIG, 3971 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3972 .flags = GENL_ADMIN_PERM, 3973 .doit = ip_vs_genl_get_cmd, 3974 }, 3975 { 3976 .cmd = IPVS_CMD_GET_INFO, 3977 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3978 .flags = GENL_ADMIN_PERM, 3979 .doit = ip_vs_genl_get_cmd, 3980 }, 3981 { 3982 .cmd = IPVS_CMD_ZERO, 3983 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3984 .flags = GENL_ADMIN_PERM, 3985 .doit = ip_vs_genl_set_cmd, 3986 }, 3987 { 3988 .cmd = IPVS_CMD_FLUSH, 3989 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3990 .flags = GENL_ADMIN_PERM, 3991 .doit = ip_vs_genl_set_cmd, 3992 }, 3993 }; 3994 3995 static struct genl_family ip_vs_genl_family __ro_after_init = { 3996 .hdrsize = 0, 3997 .name = IPVS_GENL_NAME, 3998 .version = IPVS_GENL_VERSION, 3999 .maxattr = IPVS_CMD_ATTR_MAX, 4000 .policy = ip_vs_cmd_policy, 4001 .netnsok = true, /* Make ipvsadm to work on netns */ 4002 .module = THIS_MODULE, 4003 .ops = ip_vs_genl_ops, 4004 .n_ops = ARRAY_SIZE(ip_vs_genl_ops), 4005 }; 4006 4007 static int __init ip_vs_genl_register(void) 4008 { 4009 return genl_register_family(&ip_vs_genl_family); 4010 } 4011 4012 static void ip_vs_genl_unregister(void) 4013 { 4014 genl_unregister_family(&ip_vs_genl_family); 4015 } 4016 4017 /* End of Generic Netlink interface definitions */ 4018 4019 /* 4020 * per netns intit/exit func. 4021 */ 4022 #ifdef CONFIG_SYSCTL 4023 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) 4024 { 4025 struct net *net = ipvs->net; 4026 int idx; 4027 struct ctl_table *tbl; 4028 4029 atomic_set(&ipvs->dropentry, 0); 4030 spin_lock_init(&ipvs->dropentry_lock); 4031 spin_lock_init(&ipvs->droppacket_lock); 4032 spin_lock_init(&ipvs->securetcp_lock); 4033 4034 if (!net_eq(net, &init_net)) { 4035 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 4036 if (tbl == NULL) 4037 return -ENOMEM; 4038 4039 /* Don't export sysctls to unprivileged users */ 4040 if (net->user_ns != &init_user_ns) 4041 tbl[0].procname = NULL; 4042 } else 4043 tbl = vs_vars; 4044 /* Initialize sysctl defaults */ 4045 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) { 4046 if (tbl[idx].proc_handler == proc_do_defense_mode) 4047 tbl[idx].extra2 = ipvs; 4048 } 4049 idx = 0; 4050 ipvs->sysctl_amemthresh = 1024; 4051 tbl[idx++].data = &ipvs->sysctl_amemthresh; 4052 ipvs->sysctl_am_droprate = 10; 4053 tbl[idx++].data = &ipvs->sysctl_am_droprate; 4054 tbl[idx++].data = &ipvs->sysctl_drop_entry; 4055 tbl[idx++].data = &ipvs->sysctl_drop_packet; 4056 #ifdef CONFIG_IP_VS_NFCT 4057 tbl[idx++].data = &ipvs->sysctl_conntrack; 4058 #endif 4059 tbl[idx++].data = &ipvs->sysctl_secure_tcp; 4060 ipvs->sysctl_snat_reroute = 1; 4061 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 4062 ipvs->sysctl_sync_ver = 1; 4063 tbl[idx++].data = &ipvs->sysctl_sync_ver; 4064 ipvs->sysctl_sync_ports = 1; 4065 tbl[idx++].data = &ipvs->sysctl_sync_ports; 4066 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; 4067 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 4068 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 4069 ipvs->sysctl_sync_sock_size = 0; 4070 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 4071 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 4072 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 4073 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; 4074 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; 4075 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 4076 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 4077 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 4078 tbl[idx].data = &ipvs->sysctl_sync_threshold; 4079 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 4080 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 4081 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; 4082 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); 4083 tbl[idx++].data = &ipvs->sysctl_sync_retries; 4084 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 4085 ipvs->sysctl_pmtu_disc = 1; 4086 tbl[idx++].data = &ipvs->sysctl_pmtu_disc; 4087 tbl[idx++].data = &ipvs->sysctl_backup_only; 4088 ipvs->sysctl_conn_reuse_mode = 1; 4089 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; 4090 tbl[idx++].data = &ipvs->sysctl_schedule_icmp; 4091 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; 4092 4093 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 4094 if (ipvs->sysctl_hdr == NULL) { 4095 if (!net_eq(net, &init_net)) 4096 kfree(tbl); 4097 return -ENOMEM; 4098 } 4099 ip_vs_start_estimator(ipvs, &ipvs->tot_stats); 4100 ipvs->sysctl_tbl = tbl; 4101 /* Schedule defense work */ 4102 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 4103 queue_delayed_work(system_long_wq, &ipvs->defense_work, 4104 DEFENSE_TIMER_PERIOD); 4105 4106 /* Init delayed work for expiring no dest conn */ 4107 INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, 4108 expire_nodest_conn_handler); 4109 4110 return 0; 4111 } 4112 4113 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) 4114 { 4115 struct net *net = ipvs->net; 4116 4117 cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work); 4118 cancel_delayed_work_sync(&ipvs->defense_work); 4119 cancel_work_sync(&ipvs->defense_work.work); 4120 unregister_net_sysctl_table(ipvs->sysctl_hdr); 4121 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); 4122 4123 if (!net_eq(net, &init_net)) 4124 kfree(ipvs->sysctl_tbl); 4125 } 4126 4127 #else 4128 4129 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; } 4130 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { } 4131 4132 #endif 4133 4134 static struct notifier_block ip_vs_dst_notifier = { 4135 .notifier_call = ip_vs_dst_event, 4136 #ifdef CONFIG_IP_VS_IPV6 4137 .priority = ADDRCONF_NOTIFY_PRIORITY + 5, 4138 #endif 4139 }; 4140 4141 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) 4142 { 4143 int i, idx; 4144 4145 /* Initialize rs_table */ 4146 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 4147 INIT_HLIST_HEAD(&ipvs->rs_table[idx]); 4148 4149 INIT_LIST_HEAD(&ipvs->dest_trash); 4150 spin_lock_init(&ipvs->dest_trash_lock); 4151 timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0); 4152 atomic_set(&ipvs->ftpsvc_counter, 0); 4153 atomic_set(&ipvs->nullsvc_counter, 0); 4154 atomic_set(&ipvs->conn_out_counter, 0); 4155 4156 /* procfs stats */ 4157 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 4158 if (!ipvs->tot_stats.cpustats) 4159 return -ENOMEM; 4160 4161 for_each_possible_cpu(i) { 4162 struct ip_vs_cpu_stats *ipvs_tot_stats; 4163 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); 4164 u64_stats_init(&ipvs_tot_stats->syncp); 4165 } 4166 4167 spin_lock_init(&ipvs->tot_stats.lock); 4168 4169 proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops, 4170 sizeof(struct ip_vs_iter)); 4171 proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, 4172 ip_vs_stats_show, NULL); 4173 proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net, 4174 ip_vs_stats_percpu_show, NULL); 4175 4176 if (ip_vs_control_net_init_sysctl(ipvs)) 4177 goto err; 4178 4179 return 0; 4180 4181 err: 4182 free_percpu(ipvs->tot_stats.cpustats); 4183 return -ENOMEM; 4184 } 4185 4186 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) 4187 { 4188 ip_vs_trash_cleanup(ipvs); 4189 ip_vs_control_net_cleanup_sysctl(ipvs); 4190 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4191 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4192 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4193 free_percpu(ipvs->tot_stats.cpustats); 4194 } 4195 4196 int __init ip_vs_register_nl_ioctl(void) 4197 { 4198 int ret; 4199 4200 ret = nf_register_sockopt(&ip_vs_sockopts); 4201 if (ret) { 4202 pr_err("cannot register sockopt.\n"); 4203 goto err_sock; 4204 } 4205 4206 ret = ip_vs_genl_register(); 4207 if (ret) { 4208 pr_err("cannot register Generic Netlink interface.\n"); 4209 goto err_genl; 4210 } 4211 return 0; 4212 4213 err_genl: 4214 nf_unregister_sockopt(&ip_vs_sockopts); 4215 err_sock: 4216 return ret; 4217 } 4218 4219 void ip_vs_unregister_nl_ioctl(void) 4220 { 4221 ip_vs_genl_unregister(); 4222 nf_unregister_sockopt(&ip_vs_sockopts); 4223 } 4224 4225 int __init ip_vs_control_init(void) 4226 { 4227 int idx; 4228 int ret; 4229 4230 EnterFunction(2); 4231 4232 /* Initialize svc_table, ip_vs_svc_fwm_table */ 4233 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 4234 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); 4235 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); 4236 } 4237 4238 smp_wmb(); /* Do we really need it now ? */ 4239 4240 ret = register_netdevice_notifier(&ip_vs_dst_notifier); 4241 if (ret < 0) 4242 return ret; 4243 4244 LeaveFunction(2); 4245 return 0; 4246 } 4247 4248 4249 void ip_vs_control_cleanup(void) 4250 { 4251 EnterFunction(2); 4252 unregister_netdevice_notifier(&ip_vs_dst_notifier); 4253 LeaveFunction(2); 4254 } 4255