1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPVS An implementation of the IP virtual server support for the 4 * LINUX operating system. IPVS is now implemented as a module 5 * over the NetFilter framework. IPVS can be used to build a 6 * high-performance and highly available server based on a 7 * cluster of servers. 8 * 9 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 10 * Peter Kese <peter.kese@ijs.si> 11 * Julian Anastasov <ja@ssi.bg> 12 * 13 * Changes: 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/init.h> 21 #include <linux/types.h> 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/sysctl.h> 25 #include <linux/proc_fs.h> 26 #include <linux/workqueue.h> 27 #include <linux/seq_file.h> 28 #include <linux/slab.h> 29 30 #include <linux/netfilter.h> 31 #include <linux/netfilter_ipv4.h> 32 #include <linux/mutex.h> 33 34 #include <net/net_namespace.h> 35 #include <linux/nsproxy.h> 36 #include <net/ip.h> 37 #ifdef CONFIG_IP_VS_IPV6 38 #include <net/ipv6.h> 39 #include <net/ip6_route.h> 40 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 41 #endif 42 #include <net/route.h> 43 #include <net/sock.h> 44 #include <net/genetlink.h> 45 46 #include <linux/uaccess.h> 47 48 #include <net/ip_vs.h> 49 50 MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME); 51 52 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 53 static DEFINE_MUTEX(__ip_vs_mutex); 54 55 /* sysctl variables */ 56 57 #ifdef CONFIG_IP_VS_DEBUG 58 static int sysctl_ip_vs_debug_level = 0; 59 60 int ip_vs_get_debug_level(void) 61 { 62 return sysctl_ip_vs_debug_level; 63 } 64 #endif 65 66 67 /* Protos */ 68 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); 69 70 71 #ifdef CONFIG_IP_VS_IPV6 72 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 73 static bool __ip_vs_addr_is_local_v6(struct net *net, 74 const struct in6_addr *addr) 75 { 76 struct flowi6 fl6 = { 77 .daddr = *addr, 78 }; 79 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); 80 bool is_local; 81 82 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); 83 84 dst_release(dst); 85 return is_local; 86 } 87 #endif 88 89 #ifdef CONFIG_SYSCTL 90 /* 91 * update_defense_level is called from keventd and from sysctl, 92 * so it needs to protect itself from softirqs 93 */ 94 static void update_defense_level(struct netns_ipvs *ipvs) 95 { 96 struct sysinfo i; 97 int availmem; 98 int nomem; 99 int to_change = -1; 100 101 /* we only count free and buffered memory (in pages) */ 102 si_meminfo(&i); 103 availmem = i.freeram + i.bufferram; 104 /* however in linux 2.5 the i.bufferram is total page cache size, 105 we need adjust it */ 106 /* si_swapinfo(&i); */ 107 /* availmem = availmem - (i.totalswap - i.freeswap); */ 108 109 nomem = (availmem < ipvs->sysctl_amemthresh); 110 111 local_bh_disable(); 112 113 /* drop_entry */ 114 spin_lock(&ipvs->dropentry_lock); 115 switch (ipvs->sysctl_drop_entry) { 116 case 0: 117 atomic_set(&ipvs->dropentry, 0); 118 break; 119 case 1: 120 if (nomem) { 121 atomic_set(&ipvs->dropentry, 1); 122 ipvs->sysctl_drop_entry = 2; 123 } else { 124 atomic_set(&ipvs->dropentry, 0); 125 } 126 break; 127 case 2: 128 if (nomem) { 129 atomic_set(&ipvs->dropentry, 1); 130 } else { 131 atomic_set(&ipvs->dropentry, 0); 132 ipvs->sysctl_drop_entry = 1; 133 } 134 break; 135 case 3: 136 atomic_set(&ipvs->dropentry, 1); 137 break; 138 } 139 spin_unlock(&ipvs->dropentry_lock); 140 141 /* drop_packet */ 142 spin_lock(&ipvs->droppacket_lock); 143 switch (ipvs->sysctl_drop_packet) { 144 case 0: 145 ipvs->drop_rate = 0; 146 break; 147 case 1: 148 if (nomem) { 149 ipvs->drop_rate = ipvs->drop_counter 150 = ipvs->sysctl_amemthresh / 151 (ipvs->sysctl_amemthresh-availmem); 152 ipvs->sysctl_drop_packet = 2; 153 } else { 154 ipvs->drop_rate = 0; 155 } 156 break; 157 case 2: 158 if (nomem) { 159 ipvs->drop_rate = ipvs->drop_counter 160 = ipvs->sysctl_amemthresh / 161 (ipvs->sysctl_amemthresh-availmem); 162 } else { 163 ipvs->drop_rate = 0; 164 ipvs->sysctl_drop_packet = 1; 165 } 166 break; 167 case 3: 168 ipvs->drop_rate = ipvs->sysctl_am_droprate; 169 break; 170 } 171 spin_unlock(&ipvs->droppacket_lock); 172 173 /* secure_tcp */ 174 spin_lock(&ipvs->securetcp_lock); 175 switch (ipvs->sysctl_secure_tcp) { 176 case 0: 177 if (ipvs->old_secure_tcp >= 2) 178 to_change = 0; 179 break; 180 case 1: 181 if (nomem) { 182 if (ipvs->old_secure_tcp < 2) 183 to_change = 1; 184 ipvs->sysctl_secure_tcp = 2; 185 } else { 186 if (ipvs->old_secure_tcp >= 2) 187 to_change = 0; 188 } 189 break; 190 case 2: 191 if (nomem) { 192 if (ipvs->old_secure_tcp < 2) 193 to_change = 1; 194 } else { 195 if (ipvs->old_secure_tcp >= 2) 196 to_change = 0; 197 ipvs->sysctl_secure_tcp = 1; 198 } 199 break; 200 case 3: 201 if (ipvs->old_secure_tcp < 2) 202 to_change = 1; 203 break; 204 } 205 ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; 206 if (to_change >= 0) 207 ip_vs_protocol_timeout_change(ipvs, 208 ipvs->sysctl_secure_tcp > 1); 209 spin_unlock(&ipvs->securetcp_lock); 210 211 local_bh_enable(); 212 } 213 214 /* Handler for delayed work for expiring no 215 * destination connections 216 */ 217 static void expire_nodest_conn_handler(struct work_struct *work) 218 { 219 struct netns_ipvs *ipvs; 220 221 ipvs = container_of(work, struct netns_ipvs, 222 expire_nodest_conn_work.work); 223 ip_vs_expire_nodest_conn_flush(ipvs); 224 } 225 226 /* 227 * Timer for checking the defense 228 */ 229 #define DEFENSE_TIMER_PERIOD 1*HZ 230 231 static void defense_work_handler(struct work_struct *work) 232 { 233 struct netns_ipvs *ipvs = 234 container_of(work, struct netns_ipvs, defense_work.work); 235 236 update_defense_level(ipvs); 237 if (atomic_read(&ipvs->dropentry)) 238 ip_vs_random_dropentry(ipvs); 239 queue_delayed_work(system_long_wq, &ipvs->defense_work, 240 DEFENSE_TIMER_PERIOD); 241 } 242 #endif 243 244 int 245 ip_vs_use_count_inc(void) 246 { 247 return try_module_get(THIS_MODULE); 248 } 249 250 void 251 ip_vs_use_count_dec(void) 252 { 253 module_put(THIS_MODULE); 254 } 255 256 257 /* 258 * Hash table: for virtual service lookups 259 */ 260 #define IP_VS_SVC_TAB_BITS 8 261 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 262 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 263 264 /* the service table hashed by <protocol, addr, port> */ 265 static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 266 /* the service table hashed by fwmark */ 267 static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 268 269 270 /* 271 * Returns hash value for virtual service 272 */ 273 static inline unsigned int 274 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, 275 const union nf_inet_addr *addr, __be16 port) 276 { 277 unsigned int porth = ntohs(port); 278 __be32 addr_fold = addr->ip; 279 __u32 ahash; 280 281 #ifdef CONFIG_IP_VS_IPV6 282 if (af == AF_INET6) 283 addr_fold = addr->ip6[0]^addr->ip6[1]^ 284 addr->ip6[2]^addr->ip6[3]; 285 #endif 286 ahash = ntohl(addr_fold); 287 ahash ^= ((size_t) ipvs >> 8); 288 289 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 290 IP_VS_SVC_TAB_MASK; 291 } 292 293 /* 294 * Returns hash value of fwmark for virtual service lookup 295 */ 296 static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) 297 { 298 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 299 } 300 301 /* 302 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 303 * or in the ip_vs_svc_fwm_table by fwmark. 304 * Should be called with locked tables. 305 */ 306 static int ip_vs_svc_hash(struct ip_vs_service *svc) 307 { 308 unsigned int hash; 309 310 if (svc->flags & IP_VS_SVC_F_HASHED) { 311 pr_err("%s(): request for already hashed, called from %pS\n", 312 __func__, __builtin_return_address(0)); 313 return 0; 314 } 315 316 if (svc->fwmark == 0) { 317 /* 318 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 319 */ 320 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, 321 &svc->addr, svc->port); 322 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 323 } else { 324 /* 325 * Hash it by fwmark in svc_fwm_table 326 */ 327 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); 328 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 329 } 330 331 svc->flags |= IP_VS_SVC_F_HASHED; 332 /* increase its refcnt because it is referenced by the svc table */ 333 atomic_inc(&svc->refcnt); 334 return 1; 335 } 336 337 338 /* 339 * Unhashes a service from svc_table / svc_fwm_table. 340 * Should be called with locked tables. 341 */ 342 static int ip_vs_svc_unhash(struct ip_vs_service *svc) 343 { 344 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 345 pr_err("%s(): request for unhash flagged, called from %pS\n", 346 __func__, __builtin_return_address(0)); 347 return 0; 348 } 349 350 if (svc->fwmark == 0) { 351 /* Remove it from the svc_table table */ 352 hlist_del_rcu(&svc->s_list); 353 } else { 354 /* Remove it from the svc_fwm_table table */ 355 hlist_del_rcu(&svc->f_list); 356 } 357 358 svc->flags &= ~IP_VS_SVC_F_HASHED; 359 atomic_dec(&svc->refcnt); 360 return 1; 361 } 362 363 364 /* 365 * Get service by {netns, proto,addr,port} in the service table. 366 */ 367 static inline struct ip_vs_service * 368 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, 369 const union nf_inet_addr *vaddr, __be16 vport) 370 { 371 unsigned int hash; 372 struct ip_vs_service *svc; 373 374 /* Check for "full" addressed entries */ 375 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); 376 377 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 378 if ((svc->af == af) 379 && ip_vs_addr_equal(af, &svc->addr, vaddr) 380 && (svc->port == vport) 381 && (svc->protocol == protocol) 382 && (svc->ipvs == ipvs)) { 383 /* HIT */ 384 return svc; 385 } 386 } 387 388 return NULL; 389 } 390 391 392 /* 393 * Get service by {fwmark} in the service table. 394 */ 395 static inline struct ip_vs_service * 396 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) 397 { 398 unsigned int hash; 399 struct ip_vs_service *svc; 400 401 /* Check for fwmark addressed entries */ 402 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); 403 404 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 405 if (svc->fwmark == fwmark && svc->af == af 406 && (svc->ipvs == ipvs)) { 407 /* HIT */ 408 return svc; 409 } 410 } 411 412 return NULL; 413 } 414 415 /* Find service, called under RCU lock */ 416 struct ip_vs_service * 417 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, 418 const union nf_inet_addr *vaddr, __be16 vport) 419 { 420 struct ip_vs_service *svc; 421 422 /* 423 * Check the table hashed by fwmark first 424 */ 425 if (fwmark) { 426 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); 427 if (svc) 428 goto out; 429 } 430 431 /* 432 * Check the table hashed by <protocol,addr,port> 433 * for "full" addressed entries 434 */ 435 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); 436 437 if (!svc && protocol == IPPROTO_TCP && 438 atomic_read(&ipvs->ftpsvc_counter) && 439 (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) { 440 /* 441 * Check if ftp service entry exists, the packet 442 * might belong to FTP data connections. 443 */ 444 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); 445 } 446 447 if (svc == NULL 448 && atomic_read(&ipvs->nullsvc_counter)) { 449 /* 450 * Check if the catch-all port (port zero) exists 451 */ 452 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0); 453 } 454 455 out: 456 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 457 fwmark, ip_vs_proto_name(protocol), 458 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 459 svc ? "hit" : "not hit"); 460 461 return svc; 462 } 463 464 465 static inline void 466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 467 { 468 atomic_inc(&svc->refcnt); 469 rcu_assign_pointer(dest->svc, svc); 470 } 471 472 static void ip_vs_service_free(struct ip_vs_service *svc) 473 { 474 free_percpu(svc->stats.cpustats); 475 kfree(svc); 476 } 477 478 static void ip_vs_service_rcu_free(struct rcu_head *head) 479 { 480 struct ip_vs_service *svc; 481 482 svc = container_of(head, struct ip_vs_service, rcu_head); 483 ip_vs_service_free(svc); 484 } 485 486 static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) 487 { 488 if (atomic_dec_and_test(&svc->refcnt)) { 489 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 490 svc->fwmark, 491 IP_VS_DBG_ADDR(svc->af, &svc->addr), 492 ntohs(svc->port)); 493 if (do_delay) 494 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); 495 else 496 ip_vs_service_free(svc); 497 } 498 } 499 500 501 /* 502 * Returns hash value for real service 503 */ 504 static inline unsigned int ip_vs_rs_hashkey(int af, 505 const union nf_inet_addr *addr, 506 __be16 port) 507 { 508 unsigned int porth = ntohs(port); 509 __be32 addr_fold = addr->ip; 510 511 #ifdef CONFIG_IP_VS_IPV6 512 if (af == AF_INET6) 513 addr_fold = addr->ip6[0]^addr->ip6[1]^ 514 addr->ip6[2]^addr->ip6[3]; 515 #endif 516 517 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 518 & IP_VS_RTAB_MASK; 519 } 520 521 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ 522 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 523 { 524 unsigned int hash; 525 __be16 port; 526 527 if (dest->in_rs_table) 528 return; 529 530 switch (IP_VS_DFWD_METHOD(dest)) { 531 case IP_VS_CONN_F_MASQ: 532 port = dest->port; 533 break; 534 case IP_VS_CONN_F_TUNNEL: 535 switch (dest->tun_type) { 536 case IP_VS_CONN_F_TUNNEL_TYPE_GUE: 537 port = dest->tun_port; 538 break; 539 case IP_VS_CONN_F_TUNNEL_TYPE_IPIP: 540 case IP_VS_CONN_F_TUNNEL_TYPE_GRE: 541 port = 0; 542 break; 543 default: 544 return; 545 } 546 break; 547 default: 548 return; 549 } 550 551 /* 552 * Hash by proto,addr,port, 553 * which are the parameters of the real service. 554 */ 555 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port); 556 557 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); 558 dest->in_rs_table = 1; 559 } 560 561 /* Unhash ip_vs_dest from rs_table. */ 562 static void ip_vs_rs_unhash(struct ip_vs_dest *dest) 563 { 564 /* 565 * Remove it from the rs_table table. 566 */ 567 if (dest->in_rs_table) { 568 hlist_del_rcu(&dest->d_list); 569 dest->in_rs_table = 0; 570 } 571 } 572 573 /* Check if real service by <proto,addr,port> is present */ 574 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, 575 const union nf_inet_addr *daddr, __be16 dport) 576 { 577 unsigned int hash; 578 struct ip_vs_dest *dest; 579 580 /* Check for "full" addressed entries */ 581 hash = ip_vs_rs_hashkey(af, daddr, dport); 582 583 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 584 if (dest->port == dport && 585 dest->af == af && 586 ip_vs_addr_equal(af, &dest->addr, daddr) && 587 (dest->protocol == protocol || dest->vfwmark) && 588 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 589 /* HIT */ 590 return true; 591 } 592 } 593 594 return false; 595 } 596 597 /* Find real service record by <proto,addr,port>. 598 * In case of multiple records with the same <proto,addr,port>, only 599 * the first found record is returned. 600 * 601 * To be called under RCU lock. 602 */ 603 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, 604 __u16 protocol, 605 const union nf_inet_addr *daddr, 606 __be16 dport) 607 { 608 unsigned int hash; 609 struct ip_vs_dest *dest; 610 611 /* Check for "full" addressed entries */ 612 hash = ip_vs_rs_hashkey(af, daddr, dport); 613 614 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 615 if (dest->port == dport && 616 dest->af == af && 617 ip_vs_addr_equal(af, &dest->addr, daddr) && 618 (dest->protocol == protocol || dest->vfwmark) && 619 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 620 /* HIT */ 621 return dest; 622 } 623 } 624 625 return NULL; 626 } 627 628 /* Find real service record by <af,addr,tun_port>. 629 * In case of multiple records with the same <af,addr,tun_port>, only 630 * the first found record is returned. 631 * 632 * To be called under RCU lock. 633 */ 634 struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af, 635 const union nf_inet_addr *daddr, 636 __be16 tun_port) 637 { 638 struct ip_vs_dest *dest; 639 unsigned int hash; 640 641 /* Check for "full" addressed entries */ 642 hash = ip_vs_rs_hashkey(af, daddr, tun_port); 643 644 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 645 if (dest->tun_port == tun_port && 646 dest->af == af && 647 ip_vs_addr_equal(af, &dest->addr, daddr) && 648 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) { 649 /* HIT */ 650 return dest; 651 } 652 } 653 654 return NULL; 655 } 656 657 /* Lookup destination by {addr,port} in the given service 658 * Called under RCU lock. 659 */ 660 static struct ip_vs_dest * 661 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, 662 const union nf_inet_addr *daddr, __be16 dport) 663 { 664 struct ip_vs_dest *dest; 665 666 /* 667 * Find the destination for the given service 668 */ 669 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 670 if ((dest->af == dest_af) && 671 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 672 (dest->port == dport)) { 673 /* HIT */ 674 return dest; 675 } 676 } 677 678 return NULL; 679 } 680 681 /* 682 * Find destination by {daddr,dport,vaddr,protocol} 683 * Created to be used in ip_vs_process_message() in 684 * the backup synchronization daemon. It finds the 685 * destination to be bound to the received connection 686 * on the backup. 687 * Called under RCU lock, no refcnt is returned. 688 */ 689 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af, 690 const union nf_inet_addr *daddr, 691 __be16 dport, 692 const union nf_inet_addr *vaddr, 693 __be16 vport, __u16 protocol, __u32 fwmark, 694 __u32 flags) 695 { 696 struct ip_vs_dest *dest; 697 struct ip_vs_service *svc; 698 __be16 port = dport; 699 700 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport); 701 if (!svc) 702 return NULL; 703 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 704 port = 0; 705 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port); 706 if (!dest) 707 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport); 708 return dest; 709 } 710 711 void ip_vs_dest_dst_rcu_free(struct rcu_head *head) 712 { 713 struct ip_vs_dest_dst *dest_dst = container_of(head, 714 struct ip_vs_dest_dst, 715 rcu_head); 716 717 dst_release(dest_dst->dst_cache); 718 kfree(dest_dst); 719 } 720 721 /* Release dest_dst and dst_cache for dest in user context */ 722 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) 723 { 724 struct ip_vs_dest_dst *old; 725 726 old = rcu_dereference_protected(dest->dest_dst, 1); 727 if (old) { 728 RCU_INIT_POINTER(dest->dest_dst, NULL); 729 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 730 } 731 } 732 733 /* 734 * Lookup dest by {svc,addr,port} in the destination trash. 735 * The destination trash is used to hold the destinations that are removed 736 * from the service table but are still referenced by some conn entries. 737 * The reason to add the destination trash is when the dest is temporary 738 * down (either by administrator or by monitor program), the dest can be 739 * picked back from the trash, the remaining connections to the dest can 740 * continue, and the counting information of the dest is also useful for 741 * scheduling. 742 */ 743 static struct ip_vs_dest * 744 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, 745 const union nf_inet_addr *daddr, __be16 dport) 746 { 747 struct ip_vs_dest *dest; 748 struct netns_ipvs *ipvs = svc->ipvs; 749 750 /* 751 * Find the destination in trash 752 */ 753 spin_lock_bh(&ipvs->dest_trash_lock); 754 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 755 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 756 "dest->refcnt=%d\n", 757 dest->vfwmark, 758 IP_VS_DBG_ADDR(dest->af, &dest->addr), 759 ntohs(dest->port), 760 refcount_read(&dest->refcnt)); 761 if (dest->af == dest_af && 762 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 763 dest->port == dport && 764 dest->vfwmark == svc->fwmark && 765 dest->protocol == svc->protocol && 766 (svc->fwmark || 767 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 768 dest->vport == svc->port))) { 769 /* HIT */ 770 list_del(&dest->t_list); 771 goto out; 772 } 773 } 774 775 dest = NULL; 776 777 out: 778 spin_unlock_bh(&ipvs->dest_trash_lock); 779 780 return dest; 781 } 782 783 static void ip_vs_dest_free(struct ip_vs_dest *dest) 784 { 785 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); 786 787 __ip_vs_dst_cache_reset(dest); 788 __ip_vs_svc_put(svc, false); 789 free_percpu(dest->stats.cpustats); 790 ip_vs_dest_put_and_free(dest); 791 } 792 793 /* 794 * Clean up all the destinations in the trash 795 * Called by the ip_vs_control_cleanup() 796 * 797 * When the ip_vs_control_clearup is activated by ipvs module exit, 798 * the service tables must have been flushed and all the connections 799 * are expired, and the refcnt of each destination in the trash must 800 * be 1, so we simply release them here. 801 */ 802 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) 803 { 804 struct ip_vs_dest *dest, *nxt; 805 806 del_timer_sync(&ipvs->dest_trash_timer); 807 /* No need to use dest_trash_lock */ 808 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { 809 list_del(&dest->t_list); 810 ip_vs_dest_free(dest); 811 } 812 } 813 814 static void 815 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) 816 { 817 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c 818 819 spin_lock_bh(&src->lock); 820 821 IP_VS_SHOW_STATS_COUNTER(conns); 822 IP_VS_SHOW_STATS_COUNTER(inpkts); 823 IP_VS_SHOW_STATS_COUNTER(outpkts); 824 IP_VS_SHOW_STATS_COUNTER(inbytes); 825 IP_VS_SHOW_STATS_COUNTER(outbytes); 826 827 ip_vs_read_estimator(dst, src); 828 829 spin_unlock_bh(&src->lock); 830 } 831 832 static void 833 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) 834 { 835 dst->conns = (u32)src->conns; 836 dst->inpkts = (u32)src->inpkts; 837 dst->outpkts = (u32)src->outpkts; 838 dst->inbytes = src->inbytes; 839 dst->outbytes = src->outbytes; 840 dst->cps = (u32)src->cps; 841 dst->inpps = (u32)src->inpps; 842 dst->outpps = (u32)src->outpps; 843 dst->inbps = (u32)src->inbps; 844 dst->outbps = (u32)src->outbps; 845 } 846 847 static void 848 ip_vs_zero_stats(struct ip_vs_stats *stats) 849 { 850 spin_lock_bh(&stats->lock); 851 852 /* get current counters as zero point, rates are zeroed */ 853 854 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c 855 856 IP_VS_ZERO_STATS_COUNTER(conns); 857 IP_VS_ZERO_STATS_COUNTER(inpkts); 858 IP_VS_ZERO_STATS_COUNTER(outpkts); 859 IP_VS_ZERO_STATS_COUNTER(inbytes); 860 IP_VS_ZERO_STATS_COUNTER(outbytes); 861 862 ip_vs_zero_estimator(stats); 863 864 spin_unlock_bh(&stats->lock); 865 } 866 867 /* 868 * Update a destination in the given service 869 */ 870 static void 871 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 872 struct ip_vs_dest_user_kern *udest, int add) 873 { 874 struct netns_ipvs *ipvs = svc->ipvs; 875 struct ip_vs_service *old_svc; 876 struct ip_vs_scheduler *sched; 877 int conn_flags; 878 879 /* We cannot modify an address and change the address family */ 880 BUG_ON(!add && udest->af != dest->af); 881 882 if (add && udest->af != svc->af) 883 ipvs->mixed_address_family_dests++; 884 885 /* keep the last_weight with latest non-0 weight */ 886 if (add || udest->weight != 0) 887 atomic_set(&dest->last_weight, udest->weight); 888 889 /* set the weight and the flags */ 890 atomic_set(&dest->weight, udest->weight); 891 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 892 conn_flags |= IP_VS_CONN_F_INACTIVE; 893 894 /* Need to rehash? */ 895 if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) != 896 IP_VS_DFWD_METHOD(dest) || 897 udest->tun_type != dest->tun_type || 898 udest->tun_port != dest->tun_port) 899 ip_vs_rs_unhash(dest); 900 901 /* set the tunnel info */ 902 dest->tun_type = udest->tun_type; 903 dest->tun_port = udest->tun_port; 904 dest->tun_flags = udest->tun_flags; 905 906 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 907 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 908 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 909 } else { 910 /* FTP-NAT requires conntrack for mangling */ 911 if (svc->port == FTPPORT) 912 ip_vs_register_conntrack(svc); 913 } 914 atomic_set(&dest->conn_flags, conn_flags); 915 /* Put the real service in rs_table if not present. */ 916 ip_vs_rs_hash(ipvs, dest); 917 918 /* bind the service */ 919 old_svc = rcu_dereference_protected(dest->svc, 1); 920 if (!old_svc) { 921 __ip_vs_bind_svc(dest, svc); 922 } else { 923 if (old_svc != svc) { 924 ip_vs_zero_stats(&dest->stats); 925 __ip_vs_bind_svc(dest, svc); 926 __ip_vs_svc_put(old_svc, true); 927 } 928 } 929 930 /* set the dest status flags */ 931 dest->flags |= IP_VS_DEST_F_AVAILABLE; 932 933 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 934 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 935 dest->u_threshold = udest->u_threshold; 936 dest->l_threshold = udest->l_threshold; 937 938 dest->af = udest->af; 939 940 spin_lock_bh(&dest->dst_lock); 941 __ip_vs_dst_cache_reset(dest); 942 spin_unlock_bh(&dest->dst_lock); 943 944 if (add) { 945 ip_vs_start_estimator(svc->ipvs, &dest->stats); 946 list_add_rcu(&dest->n_list, &svc->destinations); 947 svc->num_dests++; 948 sched = rcu_dereference_protected(svc->scheduler, 1); 949 if (sched && sched->add_dest) 950 sched->add_dest(svc, dest); 951 } else { 952 sched = rcu_dereference_protected(svc->scheduler, 1); 953 if (sched && sched->upd_dest) 954 sched->upd_dest(svc, dest); 955 } 956 } 957 958 959 /* 960 * Create a destination for the given service 961 */ 962 static int 963 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 964 { 965 struct ip_vs_dest *dest; 966 unsigned int atype, i; 967 968 EnterFunction(2); 969 970 #ifdef CONFIG_IP_VS_IPV6 971 if (udest->af == AF_INET6) { 972 int ret; 973 974 atype = ipv6_addr_type(&udest->addr.in6); 975 if ((!(atype & IPV6_ADDR_UNICAST) || 976 atype & IPV6_ADDR_LINKLOCAL) && 977 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) 978 return -EINVAL; 979 980 ret = nf_defrag_ipv6_enable(svc->ipvs->net); 981 if (ret) 982 return ret; 983 } else 984 #endif 985 { 986 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip); 987 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 988 return -EINVAL; 989 } 990 991 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); 992 if (dest == NULL) 993 return -ENOMEM; 994 995 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 996 if (!dest->stats.cpustats) 997 goto err_alloc; 998 999 for_each_possible_cpu(i) { 1000 struct ip_vs_cpu_stats *ip_vs_dest_stats; 1001 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); 1002 u64_stats_init(&ip_vs_dest_stats->syncp); 1003 } 1004 1005 dest->af = udest->af; 1006 dest->protocol = svc->protocol; 1007 dest->vaddr = svc->addr; 1008 dest->vport = svc->port; 1009 dest->vfwmark = svc->fwmark; 1010 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); 1011 dest->port = udest->port; 1012 1013 atomic_set(&dest->activeconns, 0); 1014 atomic_set(&dest->inactconns, 0); 1015 atomic_set(&dest->persistconns, 0); 1016 refcount_set(&dest->refcnt, 1); 1017 1018 INIT_HLIST_NODE(&dest->d_list); 1019 spin_lock_init(&dest->dst_lock); 1020 spin_lock_init(&dest->stats.lock); 1021 __ip_vs_update_dest(svc, dest, udest, 1); 1022 1023 LeaveFunction(2); 1024 return 0; 1025 1026 err_alloc: 1027 kfree(dest); 1028 return -ENOMEM; 1029 } 1030 1031 1032 /* 1033 * Add a destination into an existing service 1034 */ 1035 static int 1036 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1037 { 1038 struct ip_vs_dest *dest; 1039 union nf_inet_addr daddr; 1040 __be16 dport = udest->port; 1041 int ret; 1042 1043 EnterFunction(2); 1044 1045 if (udest->weight < 0) { 1046 pr_err("%s(): server weight less than zero\n", __func__); 1047 return -ERANGE; 1048 } 1049 1050 if (udest->l_threshold > udest->u_threshold) { 1051 pr_err("%s(): lower threshold is higher than upper threshold\n", 1052 __func__); 1053 return -ERANGE; 1054 } 1055 1056 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1057 if (udest->tun_port == 0) { 1058 pr_err("%s(): tunnel port is zero\n", __func__); 1059 return -EINVAL; 1060 } 1061 } 1062 1063 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1064 1065 /* We use function that requires RCU lock */ 1066 rcu_read_lock(); 1067 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1068 rcu_read_unlock(); 1069 1070 if (dest != NULL) { 1071 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 1072 return -EEXIST; 1073 } 1074 1075 /* 1076 * Check if the dest already exists in the trash and 1077 * is from the same service 1078 */ 1079 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport); 1080 1081 if (dest != NULL) { 1082 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 1083 "dest->refcnt=%d, service %u/%s:%u\n", 1084 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), 1085 refcount_read(&dest->refcnt), 1086 dest->vfwmark, 1087 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 1088 ntohs(dest->vport)); 1089 1090 __ip_vs_update_dest(svc, dest, udest, 1); 1091 ret = 0; 1092 } else { 1093 /* 1094 * Allocate and initialize the dest structure 1095 */ 1096 ret = ip_vs_new_dest(svc, udest); 1097 } 1098 LeaveFunction(2); 1099 1100 return ret; 1101 } 1102 1103 1104 /* 1105 * Edit a destination in the given service 1106 */ 1107 static int 1108 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1109 { 1110 struct ip_vs_dest *dest; 1111 union nf_inet_addr daddr; 1112 __be16 dport = udest->port; 1113 1114 EnterFunction(2); 1115 1116 if (udest->weight < 0) { 1117 pr_err("%s(): server weight less than zero\n", __func__); 1118 return -ERANGE; 1119 } 1120 1121 if (udest->l_threshold > udest->u_threshold) { 1122 pr_err("%s(): lower threshold is higher than upper threshold\n", 1123 __func__); 1124 return -ERANGE; 1125 } 1126 1127 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1128 if (udest->tun_port == 0) { 1129 pr_err("%s(): tunnel port is zero\n", __func__); 1130 return -EINVAL; 1131 } 1132 } 1133 1134 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1135 1136 /* We use function that requires RCU lock */ 1137 rcu_read_lock(); 1138 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1139 rcu_read_unlock(); 1140 1141 if (dest == NULL) { 1142 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 1143 return -ENOENT; 1144 } 1145 1146 __ip_vs_update_dest(svc, dest, udest, 0); 1147 LeaveFunction(2); 1148 1149 return 0; 1150 } 1151 1152 /* 1153 * Delete a destination (must be already unlinked from the service) 1154 */ 1155 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, 1156 bool cleanup) 1157 { 1158 ip_vs_stop_estimator(ipvs, &dest->stats); 1159 1160 /* 1161 * Remove it from the d-linked list with the real services. 1162 */ 1163 ip_vs_rs_unhash(dest); 1164 1165 spin_lock_bh(&ipvs->dest_trash_lock); 1166 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1167 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1168 refcount_read(&dest->refcnt)); 1169 if (list_empty(&ipvs->dest_trash) && !cleanup) 1170 mod_timer(&ipvs->dest_trash_timer, 1171 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1172 /* dest lives in trash with reference */ 1173 list_add(&dest->t_list, &ipvs->dest_trash); 1174 dest->idle_start = 0; 1175 spin_unlock_bh(&ipvs->dest_trash_lock); 1176 1177 /* Queue up delayed work to expire all no destination connections. 1178 * No-op when CONFIG_SYSCTL is disabled. 1179 */ 1180 if (!cleanup) 1181 ip_vs_enqueue_expire_nodest_conns(ipvs); 1182 } 1183 1184 1185 /* 1186 * Unlink a destination from the given service 1187 */ 1188 static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1189 struct ip_vs_dest *dest, 1190 int svcupd) 1191 { 1192 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1193 1194 /* 1195 * Remove it from the d-linked destination list. 1196 */ 1197 list_del_rcu(&dest->n_list); 1198 svc->num_dests--; 1199 1200 if (dest->af != svc->af) 1201 svc->ipvs->mixed_address_family_dests--; 1202 1203 if (svcupd) { 1204 struct ip_vs_scheduler *sched; 1205 1206 sched = rcu_dereference_protected(svc->scheduler, 1); 1207 if (sched && sched->del_dest) 1208 sched->del_dest(svc, dest); 1209 } 1210 } 1211 1212 1213 /* 1214 * Delete a destination server in the given service 1215 */ 1216 static int 1217 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1218 { 1219 struct ip_vs_dest *dest; 1220 __be16 dport = udest->port; 1221 1222 EnterFunction(2); 1223 1224 /* We use function that requires RCU lock */ 1225 rcu_read_lock(); 1226 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); 1227 rcu_read_unlock(); 1228 1229 if (dest == NULL) { 1230 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1231 return -ENOENT; 1232 } 1233 1234 /* 1235 * Unlink dest from the service 1236 */ 1237 __ip_vs_unlink_dest(svc, dest, 1); 1238 1239 /* 1240 * Delete the destination 1241 */ 1242 __ip_vs_del_dest(svc->ipvs, dest, false); 1243 1244 LeaveFunction(2); 1245 1246 return 0; 1247 } 1248 1249 static void ip_vs_dest_trash_expire(struct timer_list *t) 1250 { 1251 struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer); 1252 struct ip_vs_dest *dest, *next; 1253 unsigned long now = jiffies; 1254 1255 spin_lock(&ipvs->dest_trash_lock); 1256 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1257 if (refcount_read(&dest->refcnt) > 1) 1258 continue; 1259 if (dest->idle_start) { 1260 if (time_before(now, dest->idle_start + 1261 IP_VS_DEST_TRASH_PERIOD)) 1262 continue; 1263 } else { 1264 dest->idle_start = max(1UL, now); 1265 continue; 1266 } 1267 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1268 dest->vfwmark, 1269 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1270 ntohs(dest->port)); 1271 list_del(&dest->t_list); 1272 ip_vs_dest_free(dest); 1273 } 1274 if (!list_empty(&ipvs->dest_trash)) 1275 mod_timer(&ipvs->dest_trash_timer, 1276 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1277 spin_unlock(&ipvs->dest_trash_lock); 1278 } 1279 1280 /* 1281 * Add a service into the service hash table 1282 */ 1283 static int 1284 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, 1285 struct ip_vs_service **svc_p) 1286 { 1287 int ret = 0, i; 1288 struct ip_vs_scheduler *sched = NULL; 1289 struct ip_vs_pe *pe = NULL; 1290 struct ip_vs_service *svc = NULL; 1291 int ret_hooks = -1; 1292 1293 /* increase the module use count */ 1294 if (!ip_vs_use_count_inc()) 1295 return -ENOPROTOOPT; 1296 1297 /* Lookup the scheduler by 'u->sched_name' */ 1298 if (strcmp(u->sched_name, "none")) { 1299 sched = ip_vs_scheduler_get(u->sched_name); 1300 if (!sched) { 1301 pr_info("Scheduler module ip_vs_%s not found\n", 1302 u->sched_name); 1303 ret = -ENOENT; 1304 goto out_err; 1305 } 1306 } 1307 1308 if (u->pe_name && *u->pe_name) { 1309 pe = ip_vs_pe_getbyname(u->pe_name); 1310 if (pe == NULL) { 1311 pr_info("persistence engine module ip_vs_pe_%s " 1312 "not found\n", u->pe_name); 1313 ret = -ENOENT; 1314 goto out_err; 1315 } 1316 } 1317 1318 #ifdef CONFIG_IP_VS_IPV6 1319 if (u->af == AF_INET6) { 1320 __u32 plen = (__force __u32) u->netmask; 1321 1322 if (plen < 1 || plen > 128) { 1323 ret = -EINVAL; 1324 goto out_err; 1325 } 1326 1327 ret = nf_defrag_ipv6_enable(ipvs->net); 1328 if (ret) 1329 goto out_err; 1330 } 1331 #endif 1332 1333 if ((u->af == AF_INET && !ipvs->num_services) || 1334 (u->af == AF_INET6 && !ipvs->num_services6)) { 1335 ret = ip_vs_register_hooks(ipvs, u->af); 1336 if (ret < 0) 1337 goto out_err; 1338 ret_hooks = ret; 1339 } 1340 1341 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); 1342 if (svc == NULL) { 1343 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1344 ret = -ENOMEM; 1345 goto out_err; 1346 } 1347 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1348 if (!svc->stats.cpustats) { 1349 ret = -ENOMEM; 1350 goto out_err; 1351 } 1352 1353 for_each_possible_cpu(i) { 1354 struct ip_vs_cpu_stats *ip_vs_stats; 1355 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); 1356 u64_stats_init(&ip_vs_stats->syncp); 1357 } 1358 1359 1360 /* I'm the first user of the service */ 1361 atomic_set(&svc->refcnt, 0); 1362 1363 svc->af = u->af; 1364 svc->protocol = u->protocol; 1365 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1366 svc->port = u->port; 1367 svc->fwmark = u->fwmark; 1368 svc->flags = u->flags & ~IP_VS_SVC_F_HASHED; 1369 svc->timeout = u->timeout * HZ; 1370 svc->netmask = u->netmask; 1371 svc->ipvs = ipvs; 1372 1373 INIT_LIST_HEAD(&svc->destinations); 1374 spin_lock_init(&svc->sched_lock); 1375 spin_lock_init(&svc->stats.lock); 1376 1377 /* Bind the scheduler */ 1378 if (sched) { 1379 ret = ip_vs_bind_scheduler(svc, sched); 1380 if (ret) 1381 goto out_err; 1382 sched = NULL; 1383 } 1384 1385 /* Bind the ct retriever */ 1386 RCU_INIT_POINTER(svc->pe, pe); 1387 pe = NULL; 1388 1389 /* Update the virtual service counters */ 1390 if (svc->port == FTPPORT) 1391 atomic_inc(&ipvs->ftpsvc_counter); 1392 else if (svc->port == 0) 1393 atomic_inc(&ipvs->nullsvc_counter); 1394 if (svc->pe && svc->pe->conn_out) 1395 atomic_inc(&ipvs->conn_out_counter); 1396 1397 ip_vs_start_estimator(ipvs, &svc->stats); 1398 1399 /* Count only IPv4 services for old get/setsockopt interface */ 1400 if (svc->af == AF_INET) 1401 ipvs->num_services++; 1402 else if (svc->af == AF_INET6) 1403 ipvs->num_services6++; 1404 1405 /* Hash the service into the service table */ 1406 ip_vs_svc_hash(svc); 1407 1408 *svc_p = svc; 1409 /* Now there is a service - full throttle */ 1410 ipvs->enable = 1; 1411 return 0; 1412 1413 1414 out_err: 1415 if (ret_hooks >= 0) 1416 ip_vs_unregister_hooks(ipvs, u->af); 1417 if (svc != NULL) { 1418 ip_vs_unbind_scheduler(svc, sched); 1419 ip_vs_service_free(svc); 1420 } 1421 ip_vs_scheduler_put(sched); 1422 ip_vs_pe_put(pe); 1423 1424 /* decrease the module use count */ 1425 ip_vs_use_count_dec(); 1426 1427 return ret; 1428 } 1429 1430 1431 /* 1432 * Edit a service and bind it with a new scheduler 1433 */ 1434 static int 1435 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1436 { 1437 struct ip_vs_scheduler *sched = NULL, *old_sched; 1438 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1439 int ret = 0; 1440 bool new_pe_conn_out, old_pe_conn_out; 1441 1442 /* 1443 * Lookup the scheduler, by 'u->sched_name' 1444 */ 1445 if (strcmp(u->sched_name, "none")) { 1446 sched = ip_vs_scheduler_get(u->sched_name); 1447 if (!sched) { 1448 pr_info("Scheduler module ip_vs_%s not found\n", 1449 u->sched_name); 1450 return -ENOENT; 1451 } 1452 } 1453 old_sched = sched; 1454 1455 if (u->pe_name && *u->pe_name) { 1456 pe = ip_vs_pe_getbyname(u->pe_name); 1457 if (pe == NULL) { 1458 pr_info("persistence engine module ip_vs_pe_%s " 1459 "not found\n", u->pe_name); 1460 ret = -ENOENT; 1461 goto out; 1462 } 1463 old_pe = pe; 1464 } 1465 1466 #ifdef CONFIG_IP_VS_IPV6 1467 if (u->af == AF_INET6) { 1468 __u32 plen = (__force __u32) u->netmask; 1469 1470 if (plen < 1 || plen > 128) { 1471 ret = -EINVAL; 1472 goto out; 1473 } 1474 } 1475 #endif 1476 1477 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1478 if (sched != old_sched) { 1479 if (old_sched) { 1480 ip_vs_unbind_scheduler(svc, old_sched); 1481 RCU_INIT_POINTER(svc->scheduler, NULL); 1482 /* Wait all svc->sched_data users */ 1483 synchronize_rcu(); 1484 } 1485 /* Bind the new scheduler */ 1486 if (sched) { 1487 ret = ip_vs_bind_scheduler(svc, sched); 1488 if (ret) { 1489 ip_vs_scheduler_put(sched); 1490 goto out; 1491 } 1492 } 1493 } 1494 1495 /* 1496 * Set the flags and timeout value 1497 */ 1498 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1499 svc->timeout = u->timeout * HZ; 1500 svc->netmask = u->netmask; 1501 1502 old_pe = rcu_dereference_protected(svc->pe, 1); 1503 if (pe != old_pe) { 1504 rcu_assign_pointer(svc->pe, pe); 1505 /* check for optional methods in new pe */ 1506 new_pe_conn_out = (pe && pe->conn_out) ? true : false; 1507 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; 1508 if (new_pe_conn_out && !old_pe_conn_out) 1509 atomic_inc(&svc->ipvs->conn_out_counter); 1510 if (old_pe_conn_out && !new_pe_conn_out) 1511 atomic_dec(&svc->ipvs->conn_out_counter); 1512 } 1513 1514 out: 1515 ip_vs_scheduler_put(old_sched); 1516 ip_vs_pe_put(old_pe); 1517 return ret; 1518 } 1519 1520 /* 1521 * Delete a service from the service list 1522 * - The service must be unlinked, unlocked and not referenced! 1523 * - We are called under _bh lock 1524 */ 1525 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) 1526 { 1527 struct ip_vs_dest *dest, *nxt; 1528 struct ip_vs_scheduler *old_sched; 1529 struct ip_vs_pe *old_pe; 1530 struct netns_ipvs *ipvs = svc->ipvs; 1531 1532 if (svc->af == AF_INET) { 1533 ipvs->num_services--; 1534 if (!ipvs->num_services) 1535 ip_vs_unregister_hooks(ipvs, svc->af); 1536 } else if (svc->af == AF_INET6) { 1537 ipvs->num_services6--; 1538 if (!ipvs->num_services6) 1539 ip_vs_unregister_hooks(ipvs, svc->af); 1540 } 1541 1542 ip_vs_stop_estimator(svc->ipvs, &svc->stats); 1543 1544 /* Unbind scheduler */ 1545 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1546 ip_vs_unbind_scheduler(svc, old_sched); 1547 ip_vs_scheduler_put(old_sched); 1548 1549 /* Unbind persistence engine, keep svc->pe */ 1550 old_pe = rcu_dereference_protected(svc->pe, 1); 1551 if (old_pe && old_pe->conn_out) 1552 atomic_dec(&ipvs->conn_out_counter); 1553 ip_vs_pe_put(old_pe); 1554 1555 /* 1556 * Unlink the whole destination list 1557 */ 1558 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1559 __ip_vs_unlink_dest(svc, dest, 0); 1560 __ip_vs_del_dest(svc->ipvs, dest, cleanup); 1561 } 1562 1563 /* 1564 * Update the virtual service counters 1565 */ 1566 if (svc->port == FTPPORT) 1567 atomic_dec(&ipvs->ftpsvc_counter); 1568 else if (svc->port == 0) 1569 atomic_dec(&ipvs->nullsvc_counter); 1570 1571 /* 1572 * Free the service if nobody refers to it 1573 */ 1574 __ip_vs_svc_put(svc, true); 1575 1576 /* decrease the module use count */ 1577 ip_vs_use_count_dec(); 1578 } 1579 1580 /* 1581 * Unlink a service from list and try to delete it if its refcnt reached 0 1582 */ 1583 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1584 { 1585 ip_vs_unregister_conntrack(svc); 1586 /* Hold svc to avoid double release from dest_trash */ 1587 atomic_inc(&svc->refcnt); 1588 /* 1589 * Unhash it from the service table 1590 */ 1591 ip_vs_svc_unhash(svc); 1592 1593 __ip_vs_del_service(svc, cleanup); 1594 } 1595 1596 /* 1597 * Delete a service from the service list 1598 */ 1599 static int ip_vs_del_service(struct ip_vs_service *svc) 1600 { 1601 if (svc == NULL) 1602 return -EEXIST; 1603 ip_vs_unlink_service(svc, false); 1604 1605 return 0; 1606 } 1607 1608 1609 /* 1610 * Flush all the virtual services 1611 */ 1612 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) 1613 { 1614 int idx; 1615 struct ip_vs_service *svc; 1616 struct hlist_node *n; 1617 1618 /* 1619 * Flush the service table hashed by <netns,protocol,addr,port> 1620 */ 1621 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1622 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1623 s_list) { 1624 if (svc->ipvs == ipvs) 1625 ip_vs_unlink_service(svc, cleanup); 1626 } 1627 } 1628 1629 /* 1630 * Flush the service table hashed by fwmark 1631 */ 1632 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1633 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1634 f_list) { 1635 if (svc->ipvs == ipvs) 1636 ip_vs_unlink_service(svc, cleanup); 1637 } 1638 } 1639 1640 return 0; 1641 } 1642 1643 /* 1644 * Delete service by {netns} in the service table. 1645 * Called by __ip_vs_batch_cleanup() 1646 */ 1647 void ip_vs_service_nets_cleanup(struct list_head *net_list) 1648 { 1649 struct netns_ipvs *ipvs; 1650 struct net *net; 1651 1652 EnterFunction(2); 1653 /* Check for "full" addressed entries */ 1654 mutex_lock(&__ip_vs_mutex); 1655 list_for_each_entry(net, net_list, exit_list) { 1656 ipvs = net_ipvs(net); 1657 ip_vs_flush(ipvs, true); 1658 } 1659 mutex_unlock(&__ip_vs_mutex); 1660 LeaveFunction(2); 1661 } 1662 1663 /* Put all references for device (dst_cache) */ 1664 static inline void 1665 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) 1666 { 1667 struct ip_vs_dest_dst *dest_dst; 1668 1669 spin_lock_bh(&dest->dst_lock); 1670 dest_dst = rcu_dereference_protected(dest->dest_dst, 1); 1671 if (dest_dst && dest_dst->dst_cache->dev == dev) { 1672 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1673 dev->name, 1674 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1675 ntohs(dest->port), 1676 refcount_read(&dest->refcnt)); 1677 __ip_vs_dst_cache_reset(dest); 1678 } 1679 spin_unlock_bh(&dest->dst_lock); 1680 1681 } 1682 /* Netdev event receiver 1683 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1684 */ 1685 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1686 void *ptr) 1687 { 1688 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1689 struct net *net = dev_net(dev); 1690 struct netns_ipvs *ipvs = net_ipvs(net); 1691 struct ip_vs_service *svc; 1692 struct ip_vs_dest *dest; 1693 unsigned int idx; 1694 1695 if (event != NETDEV_DOWN || !ipvs) 1696 return NOTIFY_DONE; 1697 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1698 EnterFunction(2); 1699 mutex_lock(&__ip_vs_mutex); 1700 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1701 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1702 if (svc->ipvs == ipvs) { 1703 list_for_each_entry(dest, &svc->destinations, 1704 n_list) { 1705 ip_vs_forget_dev(dest, dev); 1706 } 1707 } 1708 } 1709 1710 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1711 if (svc->ipvs == ipvs) { 1712 list_for_each_entry(dest, &svc->destinations, 1713 n_list) { 1714 ip_vs_forget_dev(dest, dev); 1715 } 1716 } 1717 1718 } 1719 } 1720 1721 spin_lock_bh(&ipvs->dest_trash_lock); 1722 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 1723 ip_vs_forget_dev(dest, dev); 1724 } 1725 spin_unlock_bh(&ipvs->dest_trash_lock); 1726 mutex_unlock(&__ip_vs_mutex); 1727 LeaveFunction(2); 1728 return NOTIFY_DONE; 1729 } 1730 1731 /* 1732 * Zero counters in a service or all services 1733 */ 1734 static int ip_vs_zero_service(struct ip_vs_service *svc) 1735 { 1736 struct ip_vs_dest *dest; 1737 1738 list_for_each_entry(dest, &svc->destinations, n_list) { 1739 ip_vs_zero_stats(&dest->stats); 1740 } 1741 ip_vs_zero_stats(&svc->stats); 1742 return 0; 1743 } 1744 1745 static int ip_vs_zero_all(struct netns_ipvs *ipvs) 1746 { 1747 int idx; 1748 struct ip_vs_service *svc; 1749 1750 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1751 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1752 if (svc->ipvs == ipvs) 1753 ip_vs_zero_service(svc); 1754 } 1755 } 1756 1757 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1758 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1759 if (svc->ipvs == ipvs) 1760 ip_vs_zero_service(svc); 1761 } 1762 } 1763 1764 ip_vs_zero_stats(&ipvs->tot_stats); 1765 return 0; 1766 } 1767 1768 #ifdef CONFIG_SYSCTL 1769 1770 static int 1771 proc_do_defense_mode(struct ctl_table *table, int write, 1772 void *buffer, size_t *lenp, loff_t *ppos) 1773 { 1774 struct netns_ipvs *ipvs = table->extra2; 1775 int *valp = table->data; 1776 int val = *valp; 1777 int rc; 1778 1779 struct ctl_table tmp = { 1780 .data = &val, 1781 .maxlen = sizeof(int), 1782 .mode = table->mode, 1783 }; 1784 1785 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1786 if (write && (*valp != val)) { 1787 if (val < 0 || val > 3) { 1788 rc = -EINVAL; 1789 } else { 1790 *valp = val; 1791 update_defense_level(ipvs); 1792 } 1793 } 1794 return rc; 1795 } 1796 1797 static int 1798 proc_do_sync_threshold(struct ctl_table *table, int write, 1799 void *buffer, size_t *lenp, loff_t *ppos) 1800 { 1801 int *valp = table->data; 1802 int val[2]; 1803 int rc; 1804 struct ctl_table tmp = { 1805 .data = &val, 1806 .maxlen = table->maxlen, 1807 .mode = table->mode, 1808 }; 1809 1810 memcpy(val, valp, sizeof(val)); 1811 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1812 if (write) { 1813 if (val[0] < 0 || val[1] < 0 || 1814 (val[0] >= val[1] && val[1])) 1815 rc = -EINVAL; 1816 else 1817 memcpy(valp, val, sizeof(val)); 1818 } 1819 return rc; 1820 } 1821 1822 static int 1823 proc_do_sync_ports(struct ctl_table *table, int write, 1824 void *buffer, size_t *lenp, loff_t *ppos) 1825 { 1826 int *valp = table->data; 1827 int val = *valp; 1828 int rc; 1829 1830 struct ctl_table tmp = { 1831 .data = &val, 1832 .maxlen = sizeof(int), 1833 .mode = table->mode, 1834 }; 1835 1836 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1837 if (write && (*valp != val)) { 1838 if (val < 1 || !is_power_of_2(val)) 1839 rc = -EINVAL; 1840 else 1841 *valp = val; 1842 } 1843 return rc; 1844 } 1845 1846 /* 1847 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1848 * Do not change order or insert new entries without 1849 * align with netns init in ip_vs_control_net_init() 1850 */ 1851 1852 static struct ctl_table vs_vars[] = { 1853 { 1854 .procname = "amemthresh", 1855 .maxlen = sizeof(int), 1856 .mode = 0644, 1857 .proc_handler = proc_dointvec, 1858 }, 1859 { 1860 .procname = "am_droprate", 1861 .maxlen = sizeof(int), 1862 .mode = 0644, 1863 .proc_handler = proc_dointvec, 1864 }, 1865 { 1866 .procname = "drop_entry", 1867 .maxlen = sizeof(int), 1868 .mode = 0644, 1869 .proc_handler = proc_do_defense_mode, 1870 }, 1871 { 1872 .procname = "drop_packet", 1873 .maxlen = sizeof(int), 1874 .mode = 0644, 1875 .proc_handler = proc_do_defense_mode, 1876 }, 1877 #ifdef CONFIG_IP_VS_NFCT 1878 { 1879 .procname = "conntrack", 1880 .maxlen = sizeof(int), 1881 .mode = 0644, 1882 .proc_handler = &proc_dointvec, 1883 }, 1884 #endif 1885 { 1886 .procname = "secure_tcp", 1887 .maxlen = sizeof(int), 1888 .mode = 0644, 1889 .proc_handler = proc_do_defense_mode, 1890 }, 1891 { 1892 .procname = "snat_reroute", 1893 .maxlen = sizeof(int), 1894 .mode = 0644, 1895 .proc_handler = &proc_dointvec, 1896 }, 1897 { 1898 .procname = "sync_version", 1899 .maxlen = sizeof(int), 1900 .mode = 0644, 1901 .proc_handler = proc_dointvec_minmax, 1902 .extra1 = SYSCTL_ZERO, 1903 .extra2 = SYSCTL_ONE, 1904 }, 1905 { 1906 .procname = "sync_ports", 1907 .maxlen = sizeof(int), 1908 .mode = 0644, 1909 .proc_handler = proc_do_sync_ports, 1910 }, 1911 { 1912 .procname = "sync_persist_mode", 1913 .maxlen = sizeof(int), 1914 .mode = 0644, 1915 .proc_handler = proc_dointvec, 1916 }, 1917 { 1918 .procname = "sync_qlen_max", 1919 .maxlen = sizeof(unsigned long), 1920 .mode = 0644, 1921 .proc_handler = proc_doulongvec_minmax, 1922 }, 1923 { 1924 .procname = "sync_sock_size", 1925 .maxlen = sizeof(int), 1926 .mode = 0644, 1927 .proc_handler = proc_dointvec, 1928 }, 1929 { 1930 .procname = "cache_bypass", 1931 .maxlen = sizeof(int), 1932 .mode = 0644, 1933 .proc_handler = proc_dointvec, 1934 }, 1935 { 1936 .procname = "expire_nodest_conn", 1937 .maxlen = sizeof(int), 1938 .mode = 0644, 1939 .proc_handler = proc_dointvec, 1940 }, 1941 { 1942 .procname = "sloppy_tcp", 1943 .maxlen = sizeof(int), 1944 .mode = 0644, 1945 .proc_handler = proc_dointvec, 1946 }, 1947 { 1948 .procname = "sloppy_sctp", 1949 .maxlen = sizeof(int), 1950 .mode = 0644, 1951 .proc_handler = proc_dointvec, 1952 }, 1953 { 1954 .procname = "expire_quiescent_template", 1955 .maxlen = sizeof(int), 1956 .mode = 0644, 1957 .proc_handler = proc_dointvec, 1958 }, 1959 { 1960 .procname = "sync_threshold", 1961 .maxlen = 1962 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1963 .mode = 0644, 1964 .proc_handler = proc_do_sync_threshold, 1965 }, 1966 { 1967 .procname = "sync_refresh_period", 1968 .maxlen = sizeof(int), 1969 .mode = 0644, 1970 .proc_handler = proc_dointvec_jiffies, 1971 }, 1972 { 1973 .procname = "sync_retries", 1974 .maxlen = sizeof(int), 1975 .mode = 0644, 1976 .proc_handler = proc_dointvec_minmax, 1977 .extra1 = SYSCTL_ZERO, 1978 .extra2 = SYSCTL_THREE, 1979 }, 1980 { 1981 .procname = "nat_icmp_send", 1982 .maxlen = sizeof(int), 1983 .mode = 0644, 1984 .proc_handler = proc_dointvec, 1985 }, 1986 { 1987 .procname = "pmtu_disc", 1988 .maxlen = sizeof(int), 1989 .mode = 0644, 1990 .proc_handler = proc_dointvec, 1991 }, 1992 { 1993 .procname = "backup_only", 1994 .maxlen = sizeof(int), 1995 .mode = 0644, 1996 .proc_handler = proc_dointvec, 1997 }, 1998 { 1999 .procname = "conn_reuse_mode", 2000 .maxlen = sizeof(int), 2001 .mode = 0644, 2002 .proc_handler = proc_dointvec, 2003 }, 2004 { 2005 .procname = "schedule_icmp", 2006 .maxlen = sizeof(int), 2007 .mode = 0644, 2008 .proc_handler = proc_dointvec, 2009 }, 2010 { 2011 .procname = "ignore_tunneled", 2012 .maxlen = sizeof(int), 2013 .mode = 0644, 2014 .proc_handler = proc_dointvec, 2015 }, 2016 { 2017 .procname = "run_estimation", 2018 .maxlen = sizeof(int), 2019 .mode = 0644, 2020 .proc_handler = proc_dointvec, 2021 }, 2022 #ifdef CONFIG_IP_VS_DEBUG 2023 { 2024 .procname = "debug_level", 2025 .data = &sysctl_ip_vs_debug_level, 2026 .maxlen = sizeof(int), 2027 .mode = 0644, 2028 .proc_handler = proc_dointvec, 2029 }, 2030 #endif 2031 { } 2032 }; 2033 2034 #endif 2035 2036 #ifdef CONFIG_PROC_FS 2037 2038 struct ip_vs_iter { 2039 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 2040 struct hlist_head *table; 2041 int bucket; 2042 }; 2043 2044 /* 2045 * Write the contents of the VS rule table to a PROCfs file. 2046 * (It is kept just for backward compatibility) 2047 */ 2048 static inline const char *ip_vs_fwd_name(unsigned int flags) 2049 { 2050 switch (flags & IP_VS_CONN_F_FWD_MASK) { 2051 case IP_VS_CONN_F_LOCALNODE: 2052 return "Local"; 2053 case IP_VS_CONN_F_TUNNEL: 2054 return "Tunnel"; 2055 case IP_VS_CONN_F_DROUTE: 2056 return "Route"; 2057 default: 2058 return "Masq"; 2059 } 2060 } 2061 2062 2063 /* Get the Nth entry in the two lists */ 2064 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 2065 { 2066 struct net *net = seq_file_net(seq); 2067 struct netns_ipvs *ipvs = net_ipvs(net); 2068 struct ip_vs_iter *iter = seq->private; 2069 int idx; 2070 struct ip_vs_service *svc; 2071 2072 /* look in hash by protocol */ 2073 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2074 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 2075 if ((svc->ipvs == ipvs) && pos-- == 0) { 2076 iter->table = ip_vs_svc_table; 2077 iter->bucket = idx; 2078 return svc; 2079 } 2080 } 2081 } 2082 2083 /* keep looking in fwmark */ 2084 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2085 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 2086 f_list) { 2087 if ((svc->ipvs == ipvs) && pos-- == 0) { 2088 iter->table = ip_vs_svc_fwm_table; 2089 iter->bucket = idx; 2090 return svc; 2091 } 2092 } 2093 } 2094 2095 return NULL; 2096 } 2097 2098 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 2099 __acquires(RCU) 2100 { 2101 rcu_read_lock(); 2102 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 2103 } 2104 2105 2106 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2107 { 2108 struct hlist_node *e; 2109 struct ip_vs_iter *iter; 2110 struct ip_vs_service *svc; 2111 2112 ++*pos; 2113 if (v == SEQ_START_TOKEN) 2114 return ip_vs_info_array(seq,0); 2115 2116 svc = v; 2117 iter = seq->private; 2118 2119 if (iter->table == ip_vs_svc_table) { 2120 /* next service in table hashed by protocol */ 2121 e = rcu_dereference(hlist_next_rcu(&svc->s_list)); 2122 if (e) 2123 return hlist_entry(e, struct ip_vs_service, s_list); 2124 2125 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2126 hlist_for_each_entry_rcu(svc, 2127 &ip_vs_svc_table[iter->bucket], 2128 s_list) { 2129 return svc; 2130 } 2131 } 2132 2133 iter->table = ip_vs_svc_fwm_table; 2134 iter->bucket = -1; 2135 goto scan_fwmark; 2136 } 2137 2138 /* next service in hashed by fwmark */ 2139 e = rcu_dereference(hlist_next_rcu(&svc->f_list)); 2140 if (e) 2141 return hlist_entry(e, struct ip_vs_service, f_list); 2142 2143 scan_fwmark: 2144 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2145 hlist_for_each_entry_rcu(svc, 2146 &ip_vs_svc_fwm_table[iter->bucket], 2147 f_list) 2148 return svc; 2149 } 2150 2151 return NULL; 2152 } 2153 2154 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 2155 __releases(RCU) 2156 { 2157 rcu_read_unlock(); 2158 } 2159 2160 2161 static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 2162 { 2163 if (v == SEQ_START_TOKEN) { 2164 seq_printf(seq, 2165 "IP Virtual Server version %d.%d.%d (size=%d)\n", 2166 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2167 seq_puts(seq, 2168 "Prot LocalAddress:Port Scheduler Flags\n"); 2169 seq_puts(seq, 2170 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2171 } else { 2172 struct net *net = seq_file_net(seq); 2173 struct netns_ipvs *ipvs = net_ipvs(net); 2174 const struct ip_vs_service *svc = v; 2175 const struct ip_vs_iter *iter = seq->private; 2176 const struct ip_vs_dest *dest; 2177 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2178 char *sched_name = sched ? sched->name : "none"; 2179 2180 if (svc->ipvs != ipvs) 2181 return 0; 2182 if (iter->table == ip_vs_svc_table) { 2183 #ifdef CONFIG_IP_VS_IPV6 2184 if (svc->af == AF_INET6) 2185 seq_printf(seq, "%s [%pI6]:%04X %s ", 2186 ip_vs_proto_name(svc->protocol), 2187 &svc->addr.in6, 2188 ntohs(svc->port), 2189 sched_name); 2190 else 2191 #endif 2192 seq_printf(seq, "%s %08X:%04X %s %s ", 2193 ip_vs_proto_name(svc->protocol), 2194 ntohl(svc->addr.ip), 2195 ntohs(svc->port), 2196 sched_name, 2197 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2198 } else { 2199 seq_printf(seq, "FWM %08X %s %s", 2200 svc->fwmark, sched_name, 2201 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2202 } 2203 2204 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 2205 seq_printf(seq, "persistent %d %08X\n", 2206 svc->timeout, 2207 ntohl(svc->netmask)); 2208 else 2209 seq_putc(seq, '\n'); 2210 2211 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 2212 #ifdef CONFIG_IP_VS_IPV6 2213 if (dest->af == AF_INET6) 2214 seq_printf(seq, 2215 " -> [%pI6]:%04X" 2216 " %-7s %-6d %-10d %-10d\n", 2217 &dest->addr.in6, 2218 ntohs(dest->port), 2219 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2220 atomic_read(&dest->weight), 2221 atomic_read(&dest->activeconns), 2222 atomic_read(&dest->inactconns)); 2223 else 2224 #endif 2225 seq_printf(seq, 2226 " -> %08X:%04X " 2227 "%-7s %-6d %-10d %-10d\n", 2228 ntohl(dest->addr.ip), 2229 ntohs(dest->port), 2230 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2231 atomic_read(&dest->weight), 2232 atomic_read(&dest->activeconns), 2233 atomic_read(&dest->inactconns)); 2234 2235 } 2236 } 2237 return 0; 2238 } 2239 2240 static const struct seq_operations ip_vs_info_seq_ops = { 2241 .start = ip_vs_info_seq_start, 2242 .next = ip_vs_info_seq_next, 2243 .stop = ip_vs_info_seq_stop, 2244 .show = ip_vs_info_seq_show, 2245 }; 2246 2247 static int ip_vs_stats_show(struct seq_file *seq, void *v) 2248 { 2249 struct net *net = seq_file_single_net(seq); 2250 struct ip_vs_kstats show; 2251 2252 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2253 seq_puts(seq, 2254 " Total Incoming Outgoing Incoming Outgoing\n"); 2255 seq_puts(seq, 2256 " Conns Packets Packets Bytes Bytes\n"); 2257 2258 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); 2259 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", 2260 (unsigned long long)show.conns, 2261 (unsigned long long)show.inpkts, 2262 (unsigned long long)show.outpkts, 2263 (unsigned long long)show.inbytes, 2264 (unsigned long long)show.outbytes); 2265 2266 /* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ 2267 seq_puts(seq, 2268 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2269 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", 2270 (unsigned long long)show.cps, 2271 (unsigned long long)show.inpps, 2272 (unsigned long long)show.outpps, 2273 (unsigned long long)show.inbps, 2274 (unsigned long long)show.outbps); 2275 2276 return 0; 2277 } 2278 2279 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2280 { 2281 struct net *net = seq_file_single_net(seq); 2282 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2283 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; 2284 struct ip_vs_kstats kstats; 2285 int i; 2286 2287 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2288 seq_puts(seq, 2289 " Total Incoming Outgoing Incoming Outgoing\n"); 2290 seq_puts(seq, 2291 "CPU Conns Packets Packets Bytes Bytes\n"); 2292 2293 for_each_possible_cpu(i) { 2294 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); 2295 unsigned int start; 2296 u64 conns, inpkts, outpkts, inbytes, outbytes; 2297 2298 do { 2299 start = u64_stats_fetch_begin_irq(&u->syncp); 2300 conns = u->cnt.conns; 2301 inpkts = u->cnt.inpkts; 2302 outpkts = u->cnt.outpkts; 2303 inbytes = u->cnt.inbytes; 2304 outbytes = u->cnt.outbytes; 2305 } while (u64_stats_fetch_retry_irq(&u->syncp, start)); 2306 2307 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", 2308 i, (u64)conns, (u64)inpkts, 2309 (u64)outpkts, (u64)inbytes, 2310 (u64)outbytes); 2311 } 2312 2313 ip_vs_copy_stats(&kstats, tot_stats); 2314 2315 seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", 2316 (unsigned long long)kstats.conns, 2317 (unsigned long long)kstats.inpkts, 2318 (unsigned long long)kstats.outpkts, 2319 (unsigned long long)kstats.inbytes, 2320 (unsigned long long)kstats.outbytes); 2321 2322 /* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2323 seq_puts(seq, 2324 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2325 seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", 2326 kstats.cps, 2327 kstats.inpps, 2328 kstats.outpps, 2329 kstats.inbps, 2330 kstats.outbps); 2331 2332 return 0; 2333 } 2334 #endif 2335 2336 /* 2337 * Set timeout values for tcp tcpfin udp in the timeout_table. 2338 */ 2339 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2340 { 2341 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2342 struct ip_vs_proto_data *pd; 2343 #endif 2344 2345 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2346 u->tcp_timeout, 2347 u->tcp_fin_timeout, 2348 u->udp_timeout); 2349 2350 #ifdef CONFIG_IP_VS_PROTO_TCP 2351 if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || 2352 u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { 2353 return -EINVAL; 2354 } 2355 #endif 2356 2357 #ifdef CONFIG_IP_VS_PROTO_UDP 2358 if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) 2359 return -EINVAL; 2360 #endif 2361 2362 #ifdef CONFIG_IP_VS_PROTO_TCP 2363 if (u->tcp_timeout) { 2364 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2365 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2366 = u->tcp_timeout * HZ; 2367 } 2368 2369 if (u->tcp_fin_timeout) { 2370 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2371 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2372 = u->tcp_fin_timeout * HZ; 2373 } 2374 #endif 2375 2376 #ifdef CONFIG_IP_VS_PROTO_UDP 2377 if (u->udp_timeout) { 2378 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2379 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2380 = u->udp_timeout * HZ; 2381 } 2382 #endif 2383 return 0; 2384 } 2385 2386 #define CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2387 2388 struct ip_vs_svcdest_user { 2389 struct ip_vs_service_user s; 2390 struct ip_vs_dest_user d; 2391 }; 2392 2393 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = { 2394 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user), 2395 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user), 2396 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user), 2397 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user), 2398 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user), 2399 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user), 2400 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2401 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user), 2402 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user), 2403 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user), 2404 }; 2405 2406 union ip_vs_set_arglen { 2407 struct ip_vs_service_user field_IP_VS_SO_SET_ADD; 2408 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT; 2409 struct ip_vs_service_user field_IP_VS_SO_SET_DEL; 2410 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST; 2411 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST; 2412 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST; 2413 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT; 2414 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON; 2415 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON; 2416 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO; 2417 }; 2418 2419 #define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen) 2420 2421 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2422 struct ip_vs_service_user *usvc_compat) 2423 { 2424 memset(usvc, 0, sizeof(*usvc)); 2425 2426 usvc->af = AF_INET; 2427 usvc->protocol = usvc_compat->protocol; 2428 usvc->addr.ip = usvc_compat->addr; 2429 usvc->port = usvc_compat->port; 2430 usvc->fwmark = usvc_compat->fwmark; 2431 2432 /* Deep copy of sched_name is not needed here */ 2433 usvc->sched_name = usvc_compat->sched_name; 2434 2435 usvc->flags = usvc_compat->flags; 2436 usvc->timeout = usvc_compat->timeout; 2437 usvc->netmask = usvc_compat->netmask; 2438 } 2439 2440 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2441 struct ip_vs_dest_user *udest_compat) 2442 { 2443 memset(udest, 0, sizeof(*udest)); 2444 2445 udest->addr.ip = udest_compat->addr; 2446 udest->port = udest_compat->port; 2447 udest->conn_flags = udest_compat->conn_flags; 2448 udest->weight = udest_compat->weight; 2449 udest->u_threshold = udest_compat->u_threshold; 2450 udest->l_threshold = udest_compat->l_threshold; 2451 udest->af = AF_INET; 2452 udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP; 2453 } 2454 2455 static int 2456 do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) 2457 { 2458 struct net *net = sock_net(sk); 2459 int ret; 2460 unsigned char arg[MAX_SET_ARGLEN]; 2461 struct ip_vs_service_user *usvc_compat; 2462 struct ip_vs_service_user_kern usvc; 2463 struct ip_vs_service *svc; 2464 struct ip_vs_dest_user *udest_compat; 2465 struct ip_vs_dest_user_kern udest; 2466 struct netns_ipvs *ipvs = net_ipvs(net); 2467 2468 BUILD_BUG_ON(sizeof(arg) > 255); 2469 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2470 return -EPERM; 2471 2472 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 2473 return -EINVAL; 2474 if (len != set_arglen[CMDID(cmd)]) { 2475 IP_VS_DBG(1, "set_ctl: len %u != %u\n", 2476 len, set_arglen[CMDID(cmd)]); 2477 return -EINVAL; 2478 } 2479 2480 if (copy_from_sockptr(arg, ptr, len) != 0) 2481 return -EFAULT; 2482 2483 /* Handle daemons since they have another lock */ 2484 if (cmd == IP_VS_SO_SET_STARTDAEMON || 2485 cmd == IP_VS_SO_SET_STOPDAEMON) { 2486 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2487 2488 if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2489 struct ipvs_sync_daemon_cfg cfg; 2490 2491 memset(&cfg, 0, sizeof(cfg)); 2492 ret = -EINVAL; 2493 if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, 2494 sizeof(cfg.mcast_ifn)) <= 0) 2495 return ret; 2496 cfg.syncid = dm->syncid; 2497 ret = start_sync_thread(ipvs, &cfg, dm->state); 2498 } else { 2499 ret = stop_sync_thread(ipvs, dm->state); 2500 } 2501 return ret; 2502 } 2503 2504 mutex_lock(&__ip_vs_mutex); 2505 if (cmd == IP_VS_SO_SET_FLUSH) { 2506 /* Flush the virtual service */ 2507 ret = ip_vs_flush(ipvs, false); 2508 goto out_unlock; 2509 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2510 /* Set timeout values for (tcp tcpfin udp) */ 2511 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); 2512 goto out_unlock; 2513 } else if (!len) { 2514 /* No more commands with len == 0 below */ 2515 ret = -EINVAL; 2516 goto out_unlock; 2517 } 2518 2519 usvc_compat = (struct ip_vs_service_user *)arg; 2520 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2521 2522 /* We only use the new structs internally, so copy userspace compat 2523 * structs to extended internal versions */ 2524 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2525 ip_vs_copy_udest_compat(&udest, udest_compat); 2526 2527 if (cmd == IP_VS_SO_SET_ZERO) { 2528 /* if no service address is set, zero counters in all */ 2529 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2530 ret = ip_vs_zero_all(ipvs); 2531 goto out_unlock; 2532 } 2533 } 2534 2535 if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) && 2536 strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) == 2537 IP_VS_SCHEDNAME_MAXLEN) { 2538 ret = -EINVAL; 2539 goto out_unlock; 2540 } 2541 2542 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2543 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2544 usvc.protocol != IPPROTO_SCTP) { 2545 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n", 2546 usvc.protocol, &usvc.addr.ip, 2547 ntohs(usvc.port)); 2548 ret = -EFAULT; 2549 goto out_unlock; 2550 } 2551 2552 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2553 rcu_read_lock(); 2554 if (usvc.fwmark == 0) 2555 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol, 2556 &usvc.addr, usvc.port); 2557 else 2558 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark); 2559 rcu_read_unlock(); 2560 2561 if (cmd != IP_VS_SO_SET_ADD 2562 && (svc == NULL || svc->protocol != usvc.protocol)) { 2563 ret = -ESRCH; 2564 goto out_unlock; 2565 } 2566 2567 switch (cmd) { 2568 case IP_VS_SO_SET_ADD: 2569 if (svc != NULL) 2570 ret = -EEXIST; 2571 else 2572 ret = ip_vs_add_service(ipvs, &usvc, &svc); 2573 break; 2574 case IP_VS_SO_SET_EDIT: 2575 ret = ip_vs_edit_service(svc, &usvc); 2576 break; 2577 case IP_VS_SO_SET_DEL: 2578 ret = ip_vs_del_service(svc); 2579 if (!ret) 2580 goto out_unlock; 2581 break; 2582 case IP_VS_SO_SET_ZERO: 2583 ret = ip_vs_zero_service(svc); 2584 break; 2585 case IP_VS_SO_SET_ADDDEST: 2586 ret = ip_vs_add_dest(svc, &udest); 2587 break; 2588 case IP_VS_SO_SET_EDITDEST: 2589 ret = ip_vs_edit_dest(svc, &udest); 2590 break; 2591 case IP_VS_SO_SET_DELDEST: 2592 ret = ip_vs_del_dest(svc, &udest); 2593 } 2594 2595 out_unlock: 2596 mutex_unlock(&__ip_vs_mutex); 2597 return ret; 2598 } 2599 2600 2601 static void 2602 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2603 { 2604 struct ip_vs_scheduler *sched; 2605 struct ip_vs_kstats kstats; 2606 char *sched_name; 2607 2608 sched = rcu_dereference_protected(src->scheduler, 1); 2609 sched_name = sched ? sched->name : "none"; 2610 dst->protocol = src->protocol; 2611 dst->addr = src->addr.ip; 2612 dst->port = src->port; 2613 dst->fwmark = src->fwmark; 2614 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); 2615 dst->flags = src->flags; 2616 dst->timeout = src->timeout / HZ; 2617 dst->netmask = src->netmask; 2618 dst->num_dests = src->num_dests; 2619 ip_vs_copy_stats(&kstats, &src->stats); 2620 ip_vs_export_stats_user(&dst->stats, &kstats); 2621 } 2622 2623 static inline int 2624 __ip_vs_get_service_entries(struct netns_ipvs *ipvs, 2625 const struct ip_vs_get_services *get, 2626 struct ip_vs_get_services __user *uptr) 2627 { 2628 int idx, count=0; 2629 struct ip_vs_service *svc; 2630 struct ip_vs_service_entry entry; 2631 int ret = 0; 2632 2633 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2634 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2635 /* Only expose IPv4 entries to old interface */ 2636 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2637 continue; 2638 2639 if (count >= get->num_services) 2640 goto out; 2641 memset(&entry, 0, sizeof(entry)); 2642 ip_vs_copy_service(&entry, svc); 2643 if (copy_to_user(&uptr->entrytable[count], 2644 &entry, sizeof(entry))) { 2645 ret = -EFAULT; 2646 goto out; 2647 } 2648 count++; 2649 } 2650 } 2651 2652 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2653 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2654 /* Only expose IPv4 entries to old interface */ 2655 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2656 continue; 2657 2658 if (count >= get->num_services) 2659 goto out; 2660 memset(&entry, 0, sizeof(entry)); 2661 ip_vs_copy_service(&entry, svc); 2662 if (copy_to_user(&uptr->entrytable[count], 2663 &entry, sizeof(entry))) { 2664 ret = -EFAULT; 2665 goto out; 2666 } 2667 count++; 2668 } 2669 } 2670 out: 2671 return ret; 2672 } 2673 2674 static inline int 2675 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get, 2676 struct ip_vs_get_dests __user *uptr) 2677 { 2678 struct ip_vs_service *svc; 2679 union nf_inet_addr addr = { .ip = get->addr }; 2680 int ret = 0; 2681 2682 rcu_read_lock(); 2683 if (get->fwmark) 2684 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark); 2685 else 2686 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr, 2687 get->port); 2688 rcu_read_unlock(); 2689 2690 if (svc) { 2691 int count = 0; 2692 struct ip_vs_dest *dest; 2693 struct ip_vs_dest_entry entry; 2694 struct ip_vs_kstats kstats; 2695 2696 memset(&entry, 0, sizeof(entry)); 2697 list_for_each_entry(dest, &svc->destinations, n_list) { 2698 if (count >= get->num_dests) 2699 break; 2700 2701 /* Cannot expose heterogeneous members via sockopt 2702 * interface 2703 */ 2704 if (dest->af != svc->af) 2705 continue; 2706 2707 entry.addr = dest->addr.ip; 2708 entry.port = dest->port; 2709 entry.conn_flags = atomic_read(&dest->conn_flags); 2710 entry.weight = atomic_read(&dest->weight); 2711 entry.u_threshold = dest->u_threshold; 2712 entry.l_threshold = dest->l_threshold; 2713 entry.activeconns = atomic_read(&dest->activeconns); 2714 entry.inactconns = atomic_read(&dest->inactconns); 2715 entry.persistconns = atomic_read(&dest->persistconns); 2716 ip_vs_copy_stats(&kstats, &dest->stats); 2717 ip_vs_export_stats_user(&entry.stats, &kstats); 2718 if (copy_to_user(&uptr->entrytable[count], 2719 &entry, sizeof(entry))) { 2720 ret = -EFAULT; 2721 break; 2722 } 2723 count++; 2724 } 2725 } else 2726 ret = -ESRCH; 2727 return ret; 2728 } 2729 2730 static inline void 2731 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2732 { 2733 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2734 struct ip_vs_proto_data *pd; 2735 #endif 2736 2737 memset(u, 0, sizeof (*u)); 2738 2739 #ifdef CONFIG_IP_VS_PROTO_TCP 2740 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2741 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2742 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2743 #endif 2744 #ifdef CONFIG_IP_VS_PROTO_UDP 2745 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2746 u->udp_timeout = 2747 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2748 #endif 2749 } 2750 2751 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = { 2752 [CMDID(IP_VS_SO_GET_VERSION)] = 64, 2753 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo), 2754 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services), 2755 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry), 2756 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests), 2757 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2758 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user), 2759 }; 2760 2761 union ip_vs_get_arglen { 2762 char field_IP_VS_SO_GET_VERSION[64]; 2763 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO; 2764 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES; 2765 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE; 2766 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS; 2767 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT; 2768 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2]; 2769 }; 2770 2771 #define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen) 2772 2773 static int 2774 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2775 { 2776 unsigned char arg[MAX_GET_ARGLEN]; 2777 int ret = 0; 2778 unsigned int copylen; 2779 struct net *net = sock_net(sk); 2780 struct netns_ipvs *ipvs = net_ipvs(net); 2781 2782 BUG_ON(!net); 2783 BUILD_BUG_ON(sizeof(arg) > 255); 2784 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2785 return -EPERM; 2786 2787 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2788 return -EINVAL; 2789 2790 copylen = get_arglen[CMDID(cmd)]; 2791 if (*len < (int) copylen) { 2792 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen); 2793 return -EINVAL; 2794 } 2795 2796 if (copy_from_user(arg, user, copylen) != 0) 2797 return -EFAULT; 2798 /* 2799 * Handle daemons first since it has its own locking 2800 */ 2801 if (cmd == IP_VS_SO_GET_DAEMON) { 2802 struct ip_vs_daemon_user d[2]; 2803 2804 memset(&d, 0, sizeof(d)); 2805 mutex_lock(&ipvs->sync_mutex); 2806 if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2807 d[0].state = IP_VS_STATE_MASTER; 2808 strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, 2809 sizeof(d[0].mcast_ifn)); 2810 d[0].syncid = ipvs->mcfg.syncid; 2811 } 2812 if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2813 d[1].state = IP_VS_STATE_BACKUP; 2814 strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, 2815 sizeof(d[1].mcast_ifn)); 2816 d[1].syncid = ipvs->bcfg.syncid; 2817 } 2818 if (copy_to_user(user, &d, sizeof(d)) != 0) 2819 ret = -EFAULT; 2820 mutex_unlock(&ipvs->sync_mutex); 2821 return ret; 2822 } 2823 2824 mutex_lock(&__ip_vs_mutex); 2825 switch (cmd) { 2826 case IP_VS_SO_GET_VERSION: 2827 { 2828 char buf[64]; 2829 2830 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2831 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2832 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2833 ret = -EFAULT; 2834 goto out; 2835 } 2836 *len = strlen(buf)+1; 2837 } 2838 break; 2839 2840 case IP_VS_SO_GET_INFO: 2841 { 2842 struct ip_vs_getinfo info; 2843 info.version = IP_VS_VERSION_CODE; 2844 info.size = ip_vs_conn_tab_size; 2845 info.num_services = ipvs->num_services; 2846 if (copy_to_user(user, &info, sizeof(info)) != 0) 2847 ret = -EFAULT; 2848 } 2849 break; 2850 2851 case IP_VS_SO_GET_SERVICES: 2852 { 2853 struct ip_vs_get_services *get; 2854 int size; 2855 2856 get = (struct ip_vs_get_services *)arg; 2857 size = struct_size(get, entrytable, get->num_services); 2858 if (*len != size) { 2859 pr_err("length: %u != %u\n", *len, size); 2860 ret = -EINVAL; 2861 goto out; 2862 } 2863 ret = __ip_vs_get_service_entries(ipvs, get, user); 2864 } 2865 break; 2866 2867 case IP_VS_SO_GET_SERVICE: 2868 { 2869 struct ip_vs_service_entry *entry; 2870 struct ip_vs_service *svc; 2871 union nf_inet_addr addr; 2872 2873 entry = (struct ip_vs_service_entry *)arg; 2874 addr.ip = entry->addr; 2875 rcu_read_lock(); 2876 if (entry->fwmark) 2877 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark); 2878 else 2879 svc = __ip_vs_service_find(ipvs, AF_INET, 2880 entry->protocol, &addr, 2881 entry->port); 2882 rcu_read_unlock(); 2883 if (svc) { 2884 ip_vs_copy_service(entry, svc); 2885 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2886 ret = -EFAULT; 2887 } else 2888 ret = -ESRCH; 2889 } 2890 break; 2891 2892 case IP_VS_SO_GET_DESTS: 2893 { 2894 struct ip_vs_get_dests *get; 2895 int size; 2896 2897 get = (struct ip_vs_get_dests *)arg; 2898 size = struct_size(get, entrytable, get->num_dests); 2899 if (*len != size) { 2900 pr_err("length: %u != %u\n", *len, size); 2901 ret = -EINVAL; 2902 goto out; 2903 } 2904 ret = __ip_vs_get_dest_entries(ipvs, get, user); 2905 } 2906 break; 2907 2908 case IP_VS_SO_GET_TIMEOUT: 2909 { 2910 struct ip_vs_timeout_user t; 2911 2912 __ip_vs_get_timeouts(ipvs, &t); 2913 if (copy_to_user(user, &t, sizeof(t)) != 0) 2914 ret = -EFAULT; 2915 } 2916 break; 2917 2918 default: 2919 ret = -EINVAL; 2920 } 2921 2922 out: 2923 mutex_unlock(&__ip_vs_mutex); 2924 return ret; 2925 } 2926 2927 2928 static struct nf_sockopt_ops ip_vs_sockopts = { 2929 .pf = PF_INET, 2930 .set_optmin = IP_VS_BASE_CTL, 2931 .set_optmax = IP_VS_SO_SET_MAX+1, 2932 .set = do_ip_vs_set_ctl, 2933 .get_optmin = IP_VS_BASE_CTL, 2934 .get_optmax = IP_VS_SO_GET_MAX+1, 2935 .get = do_ip_vs_get_ctl, 2936 .owner = THIS_MODULE, 2937 }; 2938 2939 /* 2940 * Generic Netlink interface 2941 */ 2942 2943 /* IPVS genetlink family */ 2944 static struct genl_family ip_vs_genl_family; 2945 2946 /* Policy used for first-level command attributes */ 2947 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2948 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2949 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2950 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2951 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2952 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2953 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2954 }; 2955 2956 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2957 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2958 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2959 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2960 .len = IP_VS_IFNAME_MAXLEN - 1 }, 2961 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2962 [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, 2963 [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, 2964 [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) }, 2965 [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 }, 2966 [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 }, 2967 }; 2968 2969 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2970 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2971 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2972 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2973 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2974 .len = sizeof(union nf_inet_addr) }, 2975 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2976 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2977 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2978 .len = IP_VS_SCHEDNAME_MAXLEN - 1 }, 2979 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, 2980 .len = IP_VS_PENAME_MAXLEN }, 2981 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2982 .len = sizeof(struct ip_vs_flags) }, 2983 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2984 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2985 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2986 }; 2987 2988 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2989 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2990 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2991 .len = sizeof(union nf_inet_addr) }, 2992 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2993 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2994 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2995 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2996 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2997 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 2998 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 2999 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 3000 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 3001 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, 3002 [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, 3003 [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, 3004 [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 }, 3005 }; 3006 3007 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 3008 struct ip_vs_kstats *kstats) 3009 { 3010 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3011 3012 if (!nl_stats) 3013 return -EMSGSIZE; 3014 3015 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || 3016 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || 3017 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || 3018 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3019 IPVS_STATS_ATTR_PAD) || 3020 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3021 IPVS_STATS_ATTR_PAD) || 3022 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || 3023 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || 3024 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || 3025 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || 3026 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) 3027 goto nla_put_failure; 3028 nla_nest_end(skb, nl_stats); 3029 3030 return 0; 3031 3032 nla_put_failure: 3033 nla_nest_cancel(skb, nl_stats); 3034 return -EMSGSIZE; 3035 } 3036 3037 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, 3038 struct ip_vs_kstats *kstats) 3039 { 3040 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3041 3042 if (!nl_stats) 3043 return -EMSGSIZE; 3044 3045 if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns, 3046 IPVS_STATS_ATTR_PAD) || 3047 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts, 3048 IPVS_STATS_ATTR_PAD) || 3049 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts, 3050 IPVS_STATS_ATTR_PAD) || 3051 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3052 IPVS_STATS_ATTR_PAD) || 3053 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3054 IPVS_STATS_ATTR_PAD) || 3055 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps, 3056 IPVS_STATS_ATTR_PAD) || 3057 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps, 3058 IPVS_STATS_ATTR_PAD) || 3059 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps, 3060 IPVS_STATS_ATTR_PAD) || 3061 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps, 3062 IPVS_STATS_ATTR_PAD) || 3063 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps, 3064 IPVS_STATS_ATTR_PAD)) 3065 goto nla_put_failure; 3066 nla_nest_end(skb, nl_stats); 3067 3068 return 0; 3069 3070 nla_put_failure: 3071 nla_nest_cancel(skb, nl_stats); 3072 return -EMSGSIZE; 3073 } 3074 3075 static int ip_vs_genl_fill_service(struct sk_buff *skb, 3076 struct ip_vs_service *svc) 3077 { 3078 struct ip_vs_scheduler *sched; 3079 struct ip_vs_pe *pe; 3080 struct nlattr *nl_service; 3081 struct ip_vs_flags flags = { .flags = svc->flags, 3082 .mask = ~0 }; 3083 struct ip_vs_kstats kstats; 3084 char *sched_name; 3085 3086 nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE); 3087 if (!nl_service) 3088 return -EMSGSIZE; 3089 3090 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) 3091 goto nla_put_failure; 3092 if (svc->fwmark) { 3093 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) 3094 goto nla_put_failure; 3095 } else { 3096 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 3097 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 3098 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 3099 goto nla_put_failure; 3100 } 3101 3102 sched = rcu_dereference_protected(svc->scheduler, 1); 3103 sched_name = sched ? sched->name : "none"; 3104 pe = rcu_dereference_protected(svc->pe, 1); 3105 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || 3106 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 3107 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 3108 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 3109 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 3110 goto nla_put_failure; 3111 ip_vs_copy_stats(&kstats, &svc->stats); 3112 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) 3113 goto nla_put_failure; 3114 if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) 3115 goto nla_put_failure; 3116 3117 nla_nest_end(skb, nl_service); 3118 3119 return 0; 3120 3121 nla_put_failure: 3122 nla_nest_cancel(skb, nl_service); 3123 return -EMSGSIZE; 3124 } 3125 3126 static int ip_vs_genl_dump_service(struct sk_buff *skb, 3127 struct ip_vs_service *svc, 3128 struct netlink_callback *cb) 3129 { 3130 void *hdr; 3131 3132 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3133 &ip_vs_genl_family, NLM_F_MULTI, 3134 IPVS_CMD_NEW_SERVICE); 3135 if (!hdr) 3136 return -EMSGSIZE; 3137 3138 if (ip_vs_genl_fill_service(skb, svc) < 0) 3139 goto nla_put_failure; 3140 3141 genlmsg_end(skb, hdr); 3142 return 0; 3143 3144 nla_put_failure: 3145 genlmsg_cancel(skb, hdr); 3146 return -EMSGSIZE; 3147 } 3148 3149 static int ip_vs_genl_dump_services(struct sk_buff *skb, 3150 struct netlink_callback *cb) 3151 { 3152 int idx = 0, i; 3153 int start = cb->args[0]; 3154 struct ip_vs_service *svc; 3155 struct net *net = sock_net(skb->sk); 3156 struct netns_ipvs *ipvs = net_ipvs(net); 3157 3158 mutex_lock(&__ip_vs_mutex); 3159 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3160 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 3161 if (++idx <= start || (svc->ipvs != ipvs)) 3162 continue; 3163 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3164 idx--; 3165 goto nla_put_failure; 3166 } 3167 } 3168 } 3169 3170 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3171 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 3172 if (++idx <= start || (svc->ipvs != ipvs)) 3173 continue; 3174 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3175 idx--; 3176 goto nla_put_failure; 3177 } 3178 } 3179 } 3180 3181 nla_put_failure: 3182 mutex_unlock(&__ip_vs_mutex); 3183 cb->args[0] = idx; 3184 3185 return skb->len; 3186 } 3187 3188 static bool ip_vs_is_af_valid(int af) 3189 { 3190 if (af == AF_INET) 3191 return true; 3192 #ifdef CONFIG_IP_VS_IPV6 3193 if (af == AF_INET6 && ipv6_mod_enabled()) 3194 return true; 3195 #endif 3196 return false; 3197 } 3198 3199 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, 3200 struct ip_vs_service_user_kern *usvc, 3201 struct nlattr *nla, bool full_entry, 3202 struct ip_vs_service **ret_svc) 3203 { 3204 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 3205 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 3206 struct ip_vs_service *svc; 3207 3208 /* Parse mandatory identifying service fields first */ 3209 if (nla == NULL || 3210 nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL)) 3211 return -EINVAL; 3212 3213 nla_af = attrs[IPVS_SVC_ATTR_AF]; 3214 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 3215 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 3216 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 3217 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 3218 3219 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 3220 return -EINVAL; 3221 3222 memset(usvc, 0, sizeof(*usvc)); 3223 3224 usvc->af = nla_get_u16(nla_af); 3225 if (!ip_vs_is_af_valid(usvc->af)) 3226 return -EAFNOSUPPORT; 3227 3228 if (nla_fwmark) { 3229 usvc->protocol = IPPROTO_TCP; 3230 usvc->fwmark = nla_get_u32(nla_fwmark); 3231 } else { 3232 usvc->protocol = nla_get_u16(nla_protocol); 3233 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 3234 usvc->port = nla_get_be16(nla_port); 3235 usvc->fwmark = 0; 3236 } 3237 3238 rcu_read_lock(); 3239 if (usvc->fwmark) 3240 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); 3241 else 3242 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, 3243 &usvc->addr, usvc->port); 3244 rcu_read_unlock(); 3245 *ret_svc = svc; 3246 3247 /* If a full entry was requested, check for the additional fields */ 3248 if (full_entry) { 3249 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, 3250 *nla_netmask; 3251 struct ip_vs_flags flags; 3252 3253 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 3254 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; 3255 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 3256 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 3257 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 3258 3259 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 3260 return -EINVAL; 3261 3262 nla_memcpy(&flags, nla_flags, sizeof(flags)); 3263 3264 /* prefill flags from service if it already exists */ 3265 if (svc) 3266 usvc->flags = svc->flags; 3267 3268 /* set new flags from userland */ 3269 usvc->flags = (usvc->flags & ~flags.mask) | 3270 (flags.flags & flags.mask); 3271 usvc->sched_name = nla_data(nla_sched); 3272 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3273 usvc->timeout = nla_get_u32(nla_timeout); 3274 usvc->netmask = nla_get_be32(nla_netmask); 3275 } 3276 3277 return 0; 3278 } 3279 3280 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, 3281 struct nlattr *nla) 3282 { 3283 struct ip_vs_service_user_kern usvc; 3284 struct ip_vs_service *svc; 3285 int ret; 3286 3287 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc); 3288 return ret ? ERR_PTR(ret) : svc; 3289 } 3290 3291 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 3292 { 3293 struct nlattr *nl_dest; 3294 struct ip_vs_kstats kstats; 3295 3296 nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST); 3297 if (!nl_dest) 3298 return -EMSGSIZE; 3299 3300 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3301 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3302 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3303 (atomic_read(&dest->conn_flags) & 3304 IP_VS_CONN_F_FWD_MASK)) || 3305 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, 3306 atomic_read(&dest->weight)) || 3307 nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE, 3308 dest->tun_type) || 3309 nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, 3310 dest->tun_port) || 3311 nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS, 3312 dest->tun_flags) || 3313 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || 3314 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || 3315 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 3316 atomic_read(&dest->activeconns)) || 3317 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, 3318 atomic_read(&dest->inactconns)) || 3319 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 3320 atomic_read(&dest->persistconns)) || 3321 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) 3322 goto nla_put_failure; 3323 ip_vs_copy_stats(&kstats, &dest->stats); 3324 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) 3325 goto nla_put_failure; 3326 if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) 3327 goto nla_put_failure; 3328 3329 nla_nest_end(skb, nl_dest); 3330 3331 return 0; 3332 3333 nla_put_failure: 3334 nla_nest_cancel(skb, nl_dest); 3335 return -EMSGSIZE; 3336 } 3337 3338 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 3339 struct netlink_callback *cb) 3340 { 3341 void *hdr; 3342 3343 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3344 &ip_vs_genl_family, NLM_F_MULTI, 3345 IPVS_CMD_NEW_DEST); 3346 if (!hdr) 3347 return -EMSGSIZE; 3348 3349 if (ip_vs_genl_fill_dest(skb, dest) < 0) 3350 goto nla_put_failure; 3351 3352 genlmsg_end(skb, hdr); 3353 return 0; 3354 3355 nla_put_failure: 3356 genlmsg_cancel(skb, hdr); 3357 return -EMSGSIZE; 3358 } 3359 3360 static int ip_vs_genl_dump_dests(struct sk_buff *skb, 3361 struct netlink_callback *cb) 3362 { 3363 int idx = 0; 3364 int start = cb->args[0]; 3365 struct ip_vs_service *svc; 3366 struct ip_vs_dest *dest; 3367 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3368 struct net *net = sock_net(skb->sk); 3369 struct netns_ipvs *ipvs = net_ipvs(net); 3370 3371 mutex_lock(&__ip_vs_mutex); 3372 3373 /* Try to find the service for which to dump destinations */ 3374 if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) 3375 goto out_err; 3376 3377 3378 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); 3379 if (IS_ERR_OR_NULL(svc)) 3380 goto out_err; 3381 3382 /* Dump the destinations */ 3383 list_for_each_entry(dest, &svc->destinations, n_list) { 3384 if (++idx <= start) 3385 continue; 3386 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 3387 idx--; 3388 goto nla_put_failure; 3389 } 3390 } 3391 3392 nla_put_failure: 3393 cb->args[0] = idx; 3394 3395 out_err: 3396 mutex_unlock(&__ip_vs_mutex); 3397 3398 return skb->len; 3399 } 3400 3401 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 3402 struct nlattr *nla, bool full_entry) 3403 { 3404 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 3405 struct nlattr *nla_addr, *nla_port; 3406 struct nlattr *nla_addr_family; 3407 3408 /* Parse mandatory identifying destination fields first */ 3409 if (nla == NULL || 3410 nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL)) 3411 return -EINVAL; 3412 3413 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 3414 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 3415 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; 3416 3417 if (!(nla_addr && nla_port)) 3418 return -EINVAL; 3419 3420 memset(udest, 0, sizeof(*udest)); 3421 3422 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3423 udest->port = nla_get_be16(nla_port); 3424 3425 if (nla_addr_family) 3426 udest->af = nla_get_u16(nla_addr_family); 3427 else 3428 udest->af = 0; 3429 3430 /* If a full entry was requested, check for the additional fields */ 3431 if (full_entry) { 3432 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 3433 *nla_l_thresh, *nla_tun_type, *nla_tun_port, 3434 *nla_tun_flags; 3435 3436 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 3437 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 3438 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 3439 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 3440 nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; 3441 nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; 3442 nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS]; 3443 3444 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 3445 return -EINVAL; 3446 3447 udest->conn_flags = nla_get_u32(nla_fwd) 3448 & IP_VS_CONN_F_FWD_MASK; 3449 udest->weight = nla_get_u32(nla_weight); 3450 udest->u_threshold = nla_get_u32(nla_u_thresh); 3451 udest->l_threshold = nla_get_u32(nla_l_thresh); 3452 3453 if (nla_tun_type) 3454 udest->tun_type = nla_get_u8(nla_tun_type); 3455 3456 if (nla_tun_port) 3457 udest->tun_port = nla_get_be16(nla_tun_port); 3458 3459 if (nla_tun_flags) 3460 udest->tun_flags = nla_get_u16(nla_tun_flags); 3461 } 3462 3463 return 0; 3464 } 3465 3466 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, 3467 struct ipvs_sync_daemon_cfg *c) 3468 { 3469 struct nlattr *nl_daemon; 3470 3471 nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON); 3472 if (!nl_daemon) 3473 return -EMSGSIZE; 3474 3475 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || 3476 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) || 3477 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) || 3478 nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) || 3479 nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) || 3480 nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl)) 3481 goto nla_put_failure; 3482 #ifdef CONFIG_IP_VS_IPV6 3483 if (c->mcast_af == AF_INET6) { 3484 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6, 3485 &c->mcast_group.in6)) 3486 goto nla_put_failure; 3487 } else 3488 #endif 3489 if (c->mcast_af == AF_INET && 3490 nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP, 3491 c->mcast_group.ip)) 3492 goto nla_put_failure; 3493 nla_nest_end(skb, nl_daemon); 3494 3495 return 0; 3496 3497 nla_put_failure: 3498 nla_nest_cancel(skb, nl_daemon); 3499 return -EMSGSIZE; 3500 } 3501 3502 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, 3503 struct ipvs_sync_daemon_cfg *c, 3504 struct netlink_callback *cb) 3505 { 3506 void *hdr; 3507 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3508 &ip_vs_genl_family, NLM_F_MULTI, 3509 IPVS_CMD_NEW_DAEMON); 3510 if (!hdr) 3511 return -EMSGSIZE; 3512 3513 if (ip_vs_genl_fill_daemon(skb, state, c)) 3514 goto nla_put_failure; 3515 3516 genlmsg_end(skb, hdr); 3517 return 0; 3518 3519 nla_put_failure: 3520 genlmsg_cancel(skb, hdr); 3521 return -EMSGSIZE; 3522 } 3523 3524 static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3525 struct netlink_callback *cb) 3526 { 3527 struct net *net = sock_net(skb->sk); 3528 struct netns_ipvs *ipvs = net_ipvs(net); 3529 3530 mutex_lock(&ipvs->sync_mutex); 3531 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3532 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3533 &ipvs->mcfg, cb) < 0) 3534 goto nla_put_failure; 3535 3536 cb->args[0] = 1; 3537 } 3538 3539 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3540 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3541 &ipvs->bcfg, cb) < 0) 3542 goto nla_put_failure; 3543 3544 cb->args[1] = 1; 3545 } 3546 3547 nla_put_failure: 3548 mutex_unlock(&ipvs->sync_mutex); 3549 3550 return skb->len; 3551 } 3552 3553 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3554 { 3555 struct ipvs_sync_daemon_cfg c; 3556 struct nlattr *a; 3557 int ret; 3558 3559 memset(&c, 0, sizeof(c)); 3560 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3561 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3562 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3563 return -EINVAL; 3564 strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3565 sizeof(c.mcast_ifn)); 3566 c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); 3567 3568 a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN]; 3569 if (a) 3570 c.sync_maxlen = nla_get_u16(a); 3571 3572 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP]; 3573 if (a) { 3574 c.mcast_af = AF_INET; 3575 c.mcast_group.ip = nla_get_in_addr(a); 3576 if (!ipv4_is_multicast(c.mcast_group.ip)) 3577 return -EINVAL; 3578 } else { 3579 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6]; 3580 if (a) { 3581 #ifdef CONFIG_IP_VS_IPV6 3582 int addr_type; 3583 3584 c.mcast_af = AF_INET6; 3585 c.mcast_group.in6 = nla_get_in6_addr(a); 3586 addr_type = ipv6_addr_type(&c.mcast_group.in6); 3587 if (!(addr_type & IPV6_ADDR_MULTICAST)) 3588 return -EINVAL; 3589 #else 3590 return -EAFNOSUPPORT; 3591 #endif 3592 } 3593 } 3594 3595 a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT]; 3596 if (a) 3597 c.mcast_port = nla_get_u16(a); 3598 3599 a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL]; 3600 if (a) 3601 c.mcast_ttl = nla_get_u8(a); 3602 3603 /* The synchronization protocol is incompatible with mixed family 3604 * services 3605 */ 3606 if (ipvs->mixed_address_family_dests > 0) 3607 return -EINVAL; 3608 3609 ret = start_sync_thread(ipvs, &c, 3610 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3611 return ret; 3612 } 3613 3614 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3615 { 3616 int ret; 3617 3618 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3619 return -EINVAL; 3620 3621 ret = stop_sync_thread(ipvs, 3622 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3623 return ret; 3624 } 3625 3626 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs) 3627 { 3628 struct ip_vs_timeout_user t; 3629 3630 __ip_vs_get_timeouts(ipvs, &t); 3631 3632 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3633 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3634 3635 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 3636 t.tcp_fin_timeout = 3637 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 3638 3639 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3640 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3641 3642 return ip_vs_set_timeout(ipvs, &t); 3643 } 3644 3645 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3646 { 3647 int ret = -EINVAL, cmd; 3648 struct net *net = sock_net(skb->sk); 3649 struct netns_ipvs *ipvs = net_ipvs(net); 3650 3651 cmd = info->genlhdr->cmd; 3652 3653 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3654 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 3655 3656 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 3657 nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack)) 3658 goto out; 3659 3660 if (cmd == IPVS_CMD_NEW_DAEMON) 3661 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs); 3662 else 3663 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs); 3664 } 3665 3666 out: 3667 return ret; 3668 } 3669 3670 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3671 { 3672 bool need_full_svc = false, need_full_dest = false; 3673 struct ip_vs_service *svc = NULL; 3674 struct ip_vs_service_user_kern usvc; 3675 struct ip_vs_dest_user_kern udest; 3676 int ret = 0, cmd; 3677 struct net *net = sock_net(skb->sk); 3678 struct netns_ipvs *ipvs = net_ipvs(net); 3679 3680 cmd = info->genlhdr->cmd; 3681 3682 mutex_lock(&__ip_vs_mutex); 3683 3684 if (cmd == IPVS_CMD_FLUSH) { 3685 ret = ip_vs_flush(ipvs, false); 3686 goto out; 3687 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3688 ret = ip_vs_genl_set_config(ipvs, info->attrs); 3689 goto out; 3690 } else if (cmd == IPVS_CMD_ZERO && 3691 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3692 ret = ip_vs_zero_all(ipvs); 3693 goto out; 3694 } 3695 3696 /* All following commands require a service argument, so check if we 3697 * received a valid one. We need a full service specification when 3698 * adding / editing a service. Only identifying members otherwise. */ 3699 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3700 need_full_svc = true; 3701 3702 ret = ip_vs_genl_parse_service(ipvs, &usvc, 3703 info->attrs[IPVS_CMD_ATTR_SERVICE], 3704 need_full_svc, &svc); 3705 if (ret) 3706 goto out; 3707 3708 /* Unless we're adding a new service, the service must already exist */ 3709 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3710 ret = -ESRCH; 3711 goto out; 3712 } 3713 3714 /* Destination commands require a valid destination argument. For 3715 * adding / editing a destination, we need a full destination 3716 * specification. */ 3717 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3718 cmd == IPVS_CMD_DEL_DEST) { 3719 if (cmd != IPVS_CMD_DEL_DEST) 3720 need_full_dest = true; 3721 3722 ret = ip_vs_genl_parse_dest(&udest, 3723 info->attrs[IPVS_CMD_ATTR_DEST], 3724 need_full_dest); 3725 if (ret) 3726 goto out; 3727 3728 /* Old protocols did not allow the user to specify address 3729 * family, so we set it to zero instead. We also didn't 3730 * allow heterogeneous pools in the old code, so it's safe 3731 * to assume that this will have the same address family as 3732 * the service. 3733 */ 3734 if (udest.af == 0) 3735 udest.af = svc->af; 3736 3737 if (!ip_vs_is_af_valid(udest.af)) { 3738 ret = -EAFNOSUPPORT; 3739 goto out; 3740 } 3741 3742 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { 3743 /* The synchronization protocol is incompatible 3744 * with mixed family services 3745 */ 3746 if (ipvs->sync_state) { 3747 ret = -EINVAL; 3748 goto out; 3749 } 3750 3751 /* Which connection types do we support? */ 3752 switch (udest.conn_flags) { 3753 case IP_VS_CONN_F_TUNNEL: 3754 /* We are able to forward this */ 3755 break; 3756 default: 3757 ret = -EINVAL; 3758 goto out; 3759 } 3760 } 3761 } 3762 3763 switch (cmd) { 3764 case IPVS_CMD_NEW_SERVICE: 3765 if (svc == NULL) 3766 ret = ip_vs_add_service(ipvs, &usvc, &svc); 3767 else 3768 ret = -EEXIST; 3769 break; 3770 case IPVS_CMD_SET_SERVICE: 3771 ret = ip_vs_edit_service(svc, &usvc); 3772 break; 3773 case IPVS_CMD_DEL_SERVICE: 3774 ret = ip_vs_del_service(svc); 3775 /* do not use svc, it can be freed */ 3776 break; 3777 case IPVS_CMD_NEW_DEST: 3778 ret = ip_vs_add_dest(svc, &udest); 3779 break; 3780 case IPVS_CMD_SET_DEST: 3781 ret = ip_vs_edit_dest(svc, &udest); 3782 break; 3783 case IPVS_CMD_DEL_DEST: 3784 ret = ip_vs_del_dest(svc, &udest); 3785 break; 3786 case IPVS_CMD_ZERO: 3787 ret = ip_vs_zero_service(svc); 3788 break; 3789 default: 3790 ret = -EINVAL; 3791 } 3792 3793 out: 3794 mutex_unlock(&__ip_vs_mutex); 3795 3796 return ret; 3797 } 3798 3799 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3800 { 3801 struct sk_buff *msg; 3802 void *reply; 3803 int ret, cmd, reply_cmd; 3804 struct net *net = sock_net(skb->sk); 3805 struct netns_ipvs *ipvs = net_ipvs(net); 3806 3807 cmd = info->genlhdr->cmd; 3808 3809 if (cmd == IPVS_CMD_GET_SERVICE) 3810 reply_cmd = IPVS_CMD_NEW_SERVICE; 3811 else if (cmd == IPVS_CMD_GET_INFO) 3812 reply_cmd = IPVS_CMD_SET_INFO; 3813 else if (cmd == IPVS_CMD_GET_CONFIG) 3814 reply_cmd = IPVS_CMD_SET_CONFIG; 3815 else { 3816 pr_err("unknown Generic Netlink command\n"); 3817 return -EINVAL; 3818 } 3819 3820 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3821 if (!msg) 3822 return -ENOMEM; 3823 3824 mutex_lock(&__ip_vs_mutex); 3825 3826 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3827 if (reply == NULL) 3828 goto nla_put_failure; 3829 3830 switch (cmd) { 3831 case IPVS_CMD_GET_SERVICE: 3832 { 3833 struct ip_vs_service *svc; 3834 3835 svc = ip_vs_genl_find_service(ipvs, 3836 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3837 if (IS_ERR(svc)) { 3838 ret = PTR_ERR(svc); 3839 goto out_err; 3840 } else if (svc) { 3841 ret = ip_vs_genl_fill_service(msg, svc); 3842 if (ret) 3843 goto nla_put_failure; 3844 } else { 3845 ret = -ESRCH; 3846 goto out_err; 3847 } 3848 3849 break; 3850 } 3851 3852 case IPVS_CMD_GET_CONFIG: 3853 { 3854 struct ip_vs_timeout_user t; 3855 3856 __ip_vs_get_timeouts(ipvs, &t); 3857 #ifdef CONFIG_IP_VS_PROTO_TCP 3858 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3859 t.tcp_timeout) || 3860 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3861 t.tcp_fin_timeout)) 3862 goto nla_put_failure; 3863 #endif 3864 #ifdef CONFIG_IP_VS_PROTO_UDP 3865 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) 3866 goto nla_put_failure; 3867 #endif 3868 3869 break; 3870 } 3871 3872 case IPVS_CMD_GET_INFO: 3873 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, 3874 IP_VS_VERSION_CODE) || 3875 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3876 ip_vs_conn_tab_size)) 3877 goto nla_put_failure; 3878 break; 3879 } 3880 3881 genlmsg_end(msg, reply); 3882 ret = genlmsg_reply(msg, info); 3883 goto out; 3884 3885 nla_put_failure: 3886 pr_err("not enough space in Netlink message\n"); 3887 ret = -EMSGSIZE; 3888 3889 out_err: 3890 nlmsg_free(msg); 3891 out: 3892 mutex_unlock(&__ip_vs_mutex); 3893 3894 return ret; 3895 } 3896 3897 3898 static const struct genl_small_ops ip_vs_genl_ops[] = { 3899 { 3900 .cmd = IPVS_CMD_NEW_SERVICE, 3901 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3902 .flags = GENL_ADMIN_PERM, 3903 .doit = ip_vs_genl_set_cmd, 3904 }, 3905 { 3906 .cmd = IPVS_CMD_SET_SERVICE, 3907 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3908 .flags = GENL_ADMIN_PERM, 3909 .doit = ip_vs_genl_set_cmd, 3910 }, 3911 { 3912 .cmd = IPVS_CMD_DEL_SERVICE, 3913 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3914 .flags = GENL_ADMIN_PERM, 3915 .doit = ip_vs_genl_set_cmd, 3916 }, 3917 { 3918 .cmd = IPVS_CMD_GET_SERVICE, 3919 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3920 .flags = GENL_ADMIN_PERM, 3921 .doit = ip_vs_genl_get_cmd, 3922 .dumpit = ip_vs_genl_dump_services, 3923 }, 3924 { 3925 .cmd = IPVS_CMD_NEW_DEST, 3926 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3927 .flags = GENL_ADMIN_PERM, 3928 .doit = ip_vs_genl_set_cmd, 3929 }, 3930 { 3931 .cmd = IPVS_CMD_SET_DEST, 3932 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3933 .flags = GENL_ADMIN_PERM, 3934 .doit = ip_vs_genl_set_cmd, 3935 }, 3936 { 3937 .cmd = IPVS_CMD_DEL_DEST, 3938 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3939 .flags = GENL_ADMIN_PERM, 3940 .doit = ip_vs_genl_set_cmd, 3941 }, 3942 { 3943 .cmd = IPVS_CMD_GET_DEST, 3944 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3945 .flags = GENL_ADMIN_PERM, 3946 .dumpit = ip_vs_genl_dump_dests, 3947 }, 3948 { 3949 .cmd = IPVS_CMD_NEW_DAEMON, 3950 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3951 .flags = GENL_ADMIN_PERM, 3952 .doit = ip_vs_genl_set_daemon, 3953 }, 3954 { 3955 .cmd = IPVS_CMD_DEL_DAEMON, 3956 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3957 .flags = GENL_ADMIN_PERM, 3958 .doit = ip_vs_genl_set_daemon, 3959 }, 3960 { 3961 .cmd = IPVS_CMD_GET_DAEMON, 3962 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3963 .flags = GENL_ADMIN_PERM, 3964 .dumpit = ip_vs_genl_dump_daemons, 3965 }, 3966 { 3967 .cmd = IPVS_CMD_SET_CONFIG, 3968 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3969 .flags = GENL_ADMIN_PERM, 3970 .doit = ip_vs_genl_set_cmd, 3971 }, 3972 { 3973 .cmd = IPVS_CMD_GET_CONFIG, 3974 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3975 .flags = GENL_ADMIN_PERM, 3976 .doit = ip_vs_genl_get_cmd, 3977 }, 3978 { 3979 .cmd = IPVS_CMD_GET_INFO, 3980 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3981 .flags = GENL_ADMIN_PERM, 3982 .doit = ip_vs_genl_get_cmd, 3983 }, 3984 { 3985 .cmd = IPVS_CMD_ZERO, 3986 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3987 .flags = GENL_ADMIN_PERM, 3988 .doit = ip_vs_genl_set_cmd, 3989 }, 3990 { 3991 .cmd = IPVS_CMD_FLUSH, 3992 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3993 .flags = GENL_ADMIN_PERM, 3994 .doit = ip_vs_genl_set_cmd, 3995 }, 3996 }; 3997 3998 static struct genl_family ip_vs_genl_family __ro_after_init = { 3999 .hdrsize = 0, 4000 .name = IPVS_GENL_NAME, 4001 .version = IPVS_GENL_VERSION, 4002 .maxattr = IPVS_CMD_ATTR_MAX, 4003 .policy = ip_vs_cmd_policy, 4004 .netnsok = true, /* Make ipvsadm to work on netns */ 4005 .module = THIS_MODULE, 4006 .small_ops = ip_vs_genl_ops, 4007 .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops), 4008 }; 4009 4010 static int __init ip_vs_genl_register(void) 4011 { 4012 return genl_register_family(&ip_vs_genl_family); 4013 } 4014 4015 static void ip_vs_genl_unregister(void) 4016 { 4017 genl_unregister_family(&ip_vs_genl_family); 4018 } 4019 4020 /* End of Generic Netlink interface definitions */ 4021 4022 /* 4023 * per netns intit/exit func. 4024 */ 4025 #ifdef CONFIG_SYSCTL 4026 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) 4027 { 4028 struct net *net = ipvs->net; 4029 int idx; 4030 struct ctl_table *tbl; 4031 4032 atomic_set(&ipvs->dropentry, 0); 4033 spin_lock_init(&ipvs->dropentry_lock); 4034 spin_lock_init(&ipvs->droppacket_lock); 4035 spin_lock_init(&ipvs->securetcp_lock); 4036 4037 if (!net_eq(net, &init_net)) { 4038 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 4039 if (tbl == NULL) 4040 return -ENOMEM; 4041 4042 /* Don't export sysctls to unprivileged users */ 4043 if (net->user_ns != &init_user_ns) 4044 tbl[0].procname = NULL; 4045 } else 4046 tbl = vs_vars; 4047 /* Initialize sysctl defaults */ 4048 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) { 4049 if (tbl[idx].proc_handler == proc_do_defense_mode) 4050 tbl[idx].extra2 = ipvs; 4051 } 4052 idx = 0; 4053 ipvs->sysctl_amemthresh = 1024; 4054 tbl[idx++].data = &ipvs->sysctl_amemthresh; 4055 ipvs->sysctl_am_droprate = 10; 4056 tbl[idx++].data = &ipvs->sysctl_am_droprate; 4057 tbl[idx++].data = &ipvs->sysctl_drop_entry; 4058 tbl[idx++].data = &ipvs->sysctl_drop_packet; 4059 #ifdef CONFIG_IP_VS_NFCT 4060 tbl[idx++].data = &ipvs->sysctl_conntrack; 4061 #endif 4062 tbl[idx++].data = &ipvs->sysctl_secure_tcp; 4063 ipvs->sysctl_snat_reroute = 1; 4064 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 4065 ipvs->sysctl_sync_ver = 1; 4066 tbl[idx++].data = &ipvs->sysctl_sync_ver; 4067 ipvs->sysctl_sync_ports = 1; 4068 tbl[idx++].data = &ipvs->sysctl_sync_ports; 4069 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; 4070 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 4071 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 4072 ipvs->sysctl_sync_sock_size = 0; 4073 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 4074 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 4075 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 4076 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; 4077 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; 4078 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 4079 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 4080 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 4081 tbl[idx].data = &ipvs->sysctl_sync_threshold; 4082 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 4083 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 4084 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; 4085 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); 4086 tbl[idx++].data = &ipvs->sysctl_sync_retries; 4087 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 4088 ipvs->sysctl_pmtu_disc = 1; 4089 tbl[idx++].data = &ipvs->sysctl_pmtu_disc; 4090 tbl[idx++].data = &ipvs->sysctl_backup_only; 4091 ipvs->sysctl_conn_reuse_mode = 1; 4092 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; 4093 tbl[idx++].data = &ipvs->sysctl_schedule_icmp; 4094 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; 4095 ipvs->sysctl_run_estimation = 1; 4096 tbl[idx++].data = &ipvs->sysctl_run_estimation; 4097 #ifdef CONFIG_IP_VS_DEBUG 4098 /* Global sysctls must be ro in non-init netns */ 4099 if (!net_eq(net, &init_net)) 4100 tbl[idx++].mode = 0444; 4101 #endif 4102 4103 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 4104 if (ipvs->sysctl_hdr == NULL) { 4105 if (!net_eq(net, &init_net)) 4106 kfree(tbl); 4107 return -ENOMEM; 4108 } 4109 ip_vs_start_estimator(ipvs, &ipvs->tot_stats); 4110 ipvs->sysctl_tbl = tbl; 4111 /* Schedule defense work */ 4112 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 4113 queue_delayed_work(system_long_wq, &ipvs->defense_work, 4114 DEFENSE_TIMER_PERIOD); 4115 4116 /* Init delayed work for expiring no dest conn */ 4117 INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, 4118 expire_nodest_conn_handler); 4119 4120 return 0; 4121 } 4122 4123 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) 4124 { 4125 struct net *net = ipvs->net; 4126 4127 cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work); 4128 cancel_delayed_work_sync(&ipvs->defense_work); 4129 cancel_work_sync(&ipvs->defense_work.work); 4130 unregister_net_sysctl_table(ipvs->sysctl_hdr); 4131 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); 4132 4133 if (!net_eq(net, &init_net)) 4134 kfree(ipvs->sysctl_tbl); 4135 } 4136 4137 #else 4138 4139 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; } 4140 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { } 4141 4142 #endif 4143 4144 static struct notifier_block ip_vs_dst_notifier = { 4145 .notifier_call = ip_vs_dst_event, 4146 #ifdef CONFIG_IP_VS_IPV6 4147 .priority = ADDRCONF_NOTIFY_PRIORITY + 5, 4148 #endif 4149 }; 4150 4151 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) 4152 { 4153 int i, idx; 4154 4155 /* Initialize rs_table */ 4156 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 4157 INIT_HLIST_HEAD(&ipvs->rs_table[idx]); 4158 4159 INIT_LIST_HEAD(&ipvs->dest_trash); 4160 spin_lock_init(&ipvs->dest_trash_lock); 4161 timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0); 4162 atomic_set(&ipvs->ftpsvc_counter, 0); 4163 atomic_set(&ipvs->nullsvc_counter, 0); 4164 atomic_set(&ipvs->conn_out_counter, 0); 4165 4166 /* procfs stats */ 4167 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 4168 if (!ipvs->tot_stats.cpustats) 4169 return -ENOMEM; 4170 4171 for_each_possible_cpu(i) { 4172 struct ip_vs_cpu_stats *ipvs_tot_stats; 4173 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); 4174 u64_stats_init(&ipvs_tot_stats->syncp); 4175 } 4176 4177 spin_lock_init(&ipvs->tot_stats.lock); 4178 4179 #ifdef CONFIG_PROC_FS 4180 if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net, 4181 &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter))) 4182 goto err_vs; 4183 if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, 4184 ip_vs_stats_show, NULL)) 4185 goto err_stats; 4186 if (!proc_create_net_single("ip_vs_stats_percpu", 0, 4187 ipvs->net->proc_net, 4188 ip_vs_stats_percpu_show, NULL)) 4189 goto err_percpu; 4190 #endif 4191 4192 if (ip_vs_control_net_init_sysctl(ipvs)) 4193 goto err; 4194 4195 return 0; 4196 4197 err: 4198 #ifdef CONFIG_PROC_FS 4199 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4200 4201 err_percpu: 4202 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4203 4204 err_stats: 4205 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4206 4207 err_vs: 4208 #endif 4209 free_percpu(ipvs->tot_stats.cpustats); 4210 return -ENOMEM; 4211 } 4212 4213 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) 4214 { 4215 ip_vs_trash_cleanup(ipvs); 4216 ip_vs_control_net_cleanup_sysctl(ipvs); 4217 #ifdef CONFIG_PROC_FS 4218 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4219 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4220 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4221 #endif 4222 free_percpu(ipvs->tot_stats.cpustats); 4223 } 4224 4225 int __init ip_vs_register_nl_ioctl(void) 4226 { 4227 int ret; 4228 4229 ret = nf_register_sockopt(&ip_vs_sockopts); 4230 if (ret) { 4231 pr_err("cannot register sockopt.\n"); 4232 goto err_sock; 4233 } 4234 4235 ret = ip_vs_genl_register(); 4236 if (ret) { 4237 pr_err("cannot register Generic Netlink interface.\n"); 4238 goto err_genl; 4239 } 4240 return 0; 4241 4242 err_genl: 4243 nf_unregister_sockopt(&ip_vs_sockopts); 4244 err_sock: 4245 return ret; 4246 } 4247 4248 void ip_vs_unregister_nl_ioctl(void) 4249 { 4250 ip_vs_genl_unregister(); 4251 nf_unregister_sockopt(&ip_vs_sockopts); 4252 } 4253 4254 int __init ip_vs_control_init(void) 4255 { 4256 int idx; 4257 int ret; 4258 4259 EnterFunction(2); 4260 4261 /* Initialize svc_table, ip_vs_svc_fwm_table */ 4262 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 4263 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); 4264 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); 4265 } 4266 4267 smp_wmb(); /* Do we really need it now ? */ 4268 4269 ret = register_netdevice_notifier(&ip_vs_dst_notifier); 4270 if (ret < 0) 4271 return ret; 4272 4273 LeaveFunction(2); 4274 return 0; 4275 } 4276 4277 4278 void ip_vs_control_cleanup(void) 4279 { 4280 EnterFunction(2); 4281 unregister_netdevice_notifier(&ip_vs_dst_notifier); 4282 LeaveFunction(2); 4283 } 4284