1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPVS An implementation of the IP virtual server support for the 4 * LINUX operating system. IPVS is now implemented as a module 5 * over the NetFilter framework. IPVS can be used to build a 6 * high-performance and highly available server based on a 7 * cluster of servers. 8 * 9 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 10 * Peter Kese <peter.kese@ijs.si> 11 * Julian Anastasov <ja@ssi.bg> 12 * 13 * Changes: 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/init.h> 21 #include <linux/types.h> 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/sysctl.h> 25 #include <linux/proc_fs.h> 26 #include <linux/workqueue.h> 27 #include <linux/seq_file.h> 28 #include <linux/slab.h> 29 30 #include <linux/netfilter.h> 31 #include <linux/netfilter_ipv4.h> 32 #include <linux/mutex.h> 33 34 #include <net/net_namespace.h> 35 #include <linux/nsproxy.h> 36 #include <net/ip.h> 37 #ifdef CONFIG_IP_VS_IPV6 38 #include <net/ipv6.h> 39 #include <net/ip6_route.h> 40 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 41 #endif 42 #include <net/route.h> 43 #include <net/sock.h> 44 #include <net/genetlink.h> 45 46 #include <linux/uaccess.h> 47 48 #include <net/ip_vs.h> 49 50 MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME); 51 52 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 53 static DEFINE_MUTEX(__ip_vs_mutex); 54 55 /* sysctl variables */ 56 57 #ifdef CONFIG_IP_VS_DEBUG 58 static int sysctl_ip_vs_debug_level = 0; 59 60 int ip_vs_get_debug_level(void) 61 { 62 return sysctl_ip_vs_debug_level; 63 } 64 #endif 65 66 67 /* Protos */ 68 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); 69 70 71 #ifdef CONFIG_IP_VS_IPV6 72 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 73 static bool __ip_vs_addr_is_local_v6(struct net *net, 74 const struct in6_addr *addr) 75 { 76 struct flowi6 fl6 = { 77 .daddr = *addr, 78 }; 79 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); 80 bool is_local; 81 82 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); 83 84 dst_release(dst); 85 return is_local; 86 } 87 #endif 88 89 #ifdef CONFIG_SYSCTL 90 /* 91 * update_defense_level is called from keventd and from sysctl, 92 * so it needs to protect itself from softirqs 93 */ 94 static void update_defense_level(struct netns_ipvs *ipvs) 95 { 96 struct sysinfo i; 97 int availmem; 98 int nomem; 99 int to_change = -1; 100 101 /* we only count free and buffered memory (in pages) */ 102 si_meminfo(&i); 103 availmem = i.freeram + i.bufferram; 104 /* however in linux 2.5 the i.bufferram is total page cache size, 105 we need adjust it */ 106 /* si_swapinfo(&i); */ 107 /* availmem = availmem - (i.totalswap - i.freeswap); */ 108 109 nomem = (availmem < ipvs->sysctl_amemthresh); 110 111 local_bh_disable(); 112 113 /* drop_entry */ 114 spin_lock(&ipvs->dropentry_lock); 115 switch (ipvs->sysctl_drop_entry) { 116 case 0: 117 atomic_set(&ipvs->dropentry, 0); 118 break; 119 case 1: 120 if (nomem) { 121 atomic_set(&ipvs->dropentry, 1); 122 ipvs->sysctl_drop_entry = 2; 123 } else { 124 atomic_set(&ipvs->dropentry, 0); 125 } 126 break; 127 case 2: 128 if (nomem) { 129 atomic_set(&ipvs->dropentry, 1); 130 } else { 131 atomic_set(&ipvs->dropentry, 0); 132 ipvs->sysctl_drop_entry = 1; 133 } 134 break; 135 case 3: 136 atomic_set(&ipvs->dropentry, 1); 137 break; 138 } 139 spin_unlock(&ipvs->dropentry_lock); 140 141 /* drop_packet */ 142 spin_lock(&ipvs->droppacket_lock); 143 switch (ipvs->sysctl_drop_packet) { 144 case 0: 145 ipvs->drop_rate = 0; 146 break; 147 case 1: 148 if (nomem) { 149 ipvs->drop_rate = ipvs->drop_counter 150 = ipvs->sysctl_amemthresh / 151 (ipvs->sysctl_amemthresh-availmem); 152 ipvs->sysctl_drop_packet = 2; 153 } else { 154 ipvs->drop_rate = 0; 155 } 156 break; 157 case 2: 158 if (nomem) { 159 ipvs->drop_rate = ipvs->drop_counter 160 = ipvs->sysctl_amemthresh / 161 (ipvs->sysctl_amemthresh-availmem); 162 } else { 163 ipvs->drop_rate = 0; 164 ipvs->sysctl_drop_packet = 1; 165 } 166 break; 167 case 3: 168 ipvs->drop_rate = ipvs->sysctl_am_droprate; 169 break; 170 } 171 spin_unlock(&ipvs->droppacket_lock); 172 173 /* secure_tcp */ 174 spin_lock(&ipvs->securetcp_lock); 175 switch (ipvs->sysctl_secure_tcp) { 176 case 0: 177 if (ipvs->old_secure_tcp >= 2) 178 to_change = 0; 179 break; 180 case 1: 181 if (nomem) { 182 if (ipvs->old_secure_tcp < 2) 183 to_change = 1; 184 ipvs->sysctl_secure_tcp = 2; 185 } else { 186 if (ipvs->old_secure_tcp >= 2) 187 to_change = 0; 188 } 189 break; 190 case 2: 191 if (nomem) { 192 if (ipvs->old_secure_tcp < 2) 193 to_change = 1; 194 } else { 195 if (ipvs->old_secure_tcp >= 2) 196 to_change = 0; 197 ipvs->sysctl_secure_tcp = 1; 198 } 199 break; 200 case 3: 201 if (ipvs->old_secure_tcp < 2) 202 to_change = 1; 203 break; 204 } 205 ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; 206 if (to_change >= 0) 207 ip_vs_protocol_timeout_change(ipvs, 208 ipvs->sysctl_secure_tcp > 1); 209 spin_unlock(&ipvs->securetcp_lock); 210 211 local_bh_enable(); 212 } 213 214 /* Handler for delayed work for expiring no 215 * destination connections 216 */ 217 static void expire_nodest_conn_handler(struct work_struct *work) 218 { 219 struct netns_ipvs *ipvs; 220 221 ipvs = container_of(work, struct netns_ipvs, 222 expire_nodest_conn_work.work); 223 ip_vs_expire_nodest_conn_flush(ipvs); 224 } 225 226 /* 227 * Timer for checking the defense 228 */ 229 #define DEFENSE_TIMER_PERIOD 1*HZ 230 231 static void defense_work_handler(struct work_struct *work) 232 { 233 struct netns_ipvs *ipvs = 234 container_of(work, struct netns_ipvs, defense_work.work); 235 236 update_defense_level(ipvs); 237 if (atomic_read(&ipvs->dropentry)) 238 ip_vs_random_dropentry(ipvs); 239 queue_delayed_work(system_long_wq, &ipvs->defense_work, 240 DEFENSE_TIMER_PERIOD); 241 } 242 #endif 243 244 int 245 ip_vs_use_count_inc(void) 246 { 247 return try_module_get(THIS_MODULE); 248 } 249 250 void 251 ip_vs_use_count_dec(void) 252 { 253 module_put(THIS_MODULE); 254 } 255 256 257 /* 258 * Hash table: for virtual service lookups 259 */ 260 #define IP_VS_SVC_TAB_BITS 8 261 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 262 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 263 264 /* the service table hashed by <protocol, addr, port> */ 265 static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 266 /* the service table hashed by fwmark */ 267 static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 268 269 270 /* 271 * Returns hash value for virtual service 272 */ 273 static inline unsigned int 274 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, 275 const union nf_inet_addr *addr, __be16 port) 276 { 277 unsigned int porth = ntohs(port); 278 __be32 addr_fold = addr->ip; 279 __u32 ahash; 280 281 #ifdef CONFIG_IP_VS_IPV6 282 if (af == AF_INET6) 283 addr_fold = addr->ip6[0]^addr->ip6[1]^ 284 addr->ip6[2]^addr->ip6[3]; 285 #endif 286 ahash = ntohl(addr_fold); 287 ahash ^= ((size_t) ipvs >> 8); 288 289 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 290 IP_VS_SVC_TAB_MASK; 291 } 292 293 /* 294 * Returns hash value of fwmark for virtual service lookup 295 */ 296 static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) 297 { 298 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 299 } 300 301 /* 302 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 303 * or in the ip_vs_svc_fwm_table by fwmark. 304 * Should be called with locked tables. 305 */ 306 static int ip_vs_svc_hash(struct ip_vs_service *svc) 307 { 308 unsigned int hash; 309 310 if (svc->flags & IP_VS_SVC_F_HASHED) { 311 pr_err("%s(): request for already hashed, called from %pS\n", 312 __func__, __builtin_return_address(0)); 313 return 0; 314 } 315 316 if (svc->fwmark == 0) { 317 /* 318 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 319 */ 320 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, 321 &svc->addr, svc->port); 322 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 323 } else { 324 /* 325 * Hash it by fwmark in svc_fwm_table 326 */ 327 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); 328 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 329 } 330 331 svc->flags |= IP_VS_SVC_F_HASHED; 332 /* increase its refcnt because it is referenced by the svc table */ 333 atomic_inc(&svc->refcnt); 334 return 1; 335 } 336 337 338 /* 339 * Unhashes a service from svc_table / svc_fwm_table. 340 * Should be called with locked tables. 341 */ 342 static int ip_vs_svc_unhash(struct ip_vs_service *svc) 343 { 344 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 345 pr_err("%s(): request for unhash flagged, called from %pS\n", 346 __func__, __builtin_return_address(0)); 347 return 0; 348 } 349 350 if (svc->fwmark == 0) { 351 /* Remove it from the svc_table table */ 352 hlist_del_rcu(&svc->s_list); 353 } else { 354 /* Remove it from the svc_fwm_table table */ 355 hlist_del_rcu(&svc->f_list); 356 } 357 358 svc->flags &= ~IP_VS_SVC_F_HASHED; 359 atomic_dec(&svc->refcnt); 360 return 1; 361 } 362 363 364 /* 365 * Get service by {netns, proto,addr,port} in the service table. 366 */ 367 static inline struct ip_vs_service * 368 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, 369 const union nf_inet_addr *vaddr, __be16 vport) 370 { 371 unsigned int hash; 372 struct ip_vs_service *svc; 373 374 /* Check for "full" addressed entries */ 375 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); 376 377 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 378 if ((svc->af == af) 379 && ip_vs_addr_equal(af, &svc->addr, vaddr) 380 && (svc->port == vport) 381 && (svc->protocol == protocol) 382 && (svc->ipvs == ipvs)) { 383 /* HIT */ 384 return svc; 385 } 386 } 387 388 return NULL; 389 } 390 391 392 /* 393 * Get service by {fwmark} in the service table. 394 */ 395 static inline struct ip_vs_service * 396 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) 397 { 398 unsigned int hash; 399 struct ip_vs_service *svc; 400 401 /* Check for fwmark addressed entries */ 402 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); 403 404 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 405 if (svc->fwmark == fwmark && svc->af == af 406 && (svc->ipvs == ipvs)) { 407 /* HIT */ 408 return svc; 409 } 410 } 411 412 return NULL; 413 } 414 415 /* Find service, called under RCU lock */ 416 struct ip_vs_service * 417 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, 418 const union nf_inet_addr *vaddr, __be16 vport) 419 { 420 struct ip_vs_service *svc; 421 422 /* 423 * Check the table hashed by fwmark first 424 */ 425 if (fwmark) { 426 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); 427 if (svc) 428 goto out; 429 } 430 431 /* 432 * Check the table hashed by <protocol,addr,port> 433 * for "full" addressed entries 434 */ 435 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); 436 437 if (!svc && protocol == IPPROTO_TCP && 438 atomic_read(&ipvs->ftpsvc_counter) && 439 (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) { 440 /* 441 * Check if ftp service entry exists, the packet 442 * might belong to FTP data connections. 443 */ 444 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); 445 } 446 447 if (svc == NULL 448 && atomic_read(&ipvs->nullsvc_counter)) { 449 /* 450 * Check if the catch-all port (port zero) exists 451 */ 452 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0); 453 } 454 455 out: 456 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 457 fwmark, ip_vs_proto_name(protocol), 458 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 459 svc ? "hit" : "not hit"); 460 461 return svc; 462 } 463 464 465 static inline void 466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 467 { 468 atomic_inc(&svc->refcnt); 469 rcu_assign_pointer(dest->svc, svc); 470 } 471 472 static void ip_vs_service_free(struct ip_vs_service *svc) 473 { 474 free_percpu(svc->stats.cpustats); 475 kfree(svc); 476 } 477 478 static void ip_vs_service_rcu_free(struct rcu_head *head) 479 { 480 struct ip_vs_service *svc; 481 482 svc = container_of(head, struct ip_vs_service, rcu_head); 483 ip_vs_service_free(svc); 484 } 485 486 static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) 487 { 488 if (atomic_dec_and_test(&svc->refcnt)) { 489 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 490 svc->fwmark, 491 IP_VS_DBG_ADDR(svc->af, &svc->addr), 492 ntohs(svc->port)); 493 if (do_delay) 494 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); 495 else 496 ip_vs_service_free(svc); 497 } 498 } 499 500 501 /* 502 * Returns hash value for real service 503 */ 504 static inline unsigned int ip_vs_rs_hashkey(int af, 505 const union nf_inet_addr *addr, 506 __be16 port) 507 { 508 unsigned int porth = ntohs(port); 509 __be32 addr_fold = addr->ip; 510 511 #ifdef CONFIG_IP_VS_IPV6 512 if (af == AF_INET6) 513 addr_fold = addr->ip6[0]^addr->ip6[1]^ 514 addr->ip6[2]^addr->ip6[3]; 515 #endif 516 517 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 518 & IP_VS_RTAB_MASK; 519 } 520 521 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ 522 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 523 { 524 unsigned int hash; 525 __be16 port; 526 527 if (dest->in_rs_table) 528 return; 529 530 switch (IP_VS_DFWD_METHOD(dest)) { 531 case IP_VS_CONN_F_MASQ: 532 port = dest->port; 533 break; 534 case IP_VS_CONN_F_TUNNEL: 535 switch (dest->tun_type) { 536 case IP_VS_CONN_F_TUNNEL_TYPE_GUE: 537 port = dest->tun_port; 538 break; 539 case IP_VS_CONN_F_TUNNEL_TYPE_IPIP: 540 case IP_VS_CONN_F_TUNNEL_TYPE_GRE: 541 port = 0; 542 break; 543 default: 544 return; 545 } 546 break; 547 default: 548 return; 549 } 550 551 /* 552 * Hash by proto,addr,port, 553 * which are the parameters of the real service. 554 */ 555 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port); 556 557 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); 558 dest->in_rs_table = 1; 559 } 560 561 /* Unhash ip_vs_dest from rs_table. */ 562 static void ip_vs_rs_unhash(struct ip_vs_dest *dest) 563 { 564 /* 565 * Remove it from the rs_table table. 566 */ 567 if (dest->in_rs_table) { 568 hlist_del_rcu(&dest->d_list); 569 dest->in_rs_table = 0; 570 } 571 } 572 573 /* Check if real service by <proto,addr,port> is present */ 574 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, 575 const union nf_inet_addr *daddr, __be16 dport) 576 { 577 unsigned int hash; 578 struct ip_vs_dest *dest; 579 580 /* Check for "full" addressed entries */ 581 hash = ip_vs_rs_hashkey(af, daddr, dport); 582 583 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 584 if (dest->port == dport && 585 dest->af == af && 586 ip_vs_addr_equal(af, &dest->addr, daddr) && 587 (dest->protocol == protocol || dest->vfwmark) && 588 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 589 /* HIT */ 590 return true; 591 } 592 } 593 594 return false; 595 } 596 597 /* Find real service record by <proto,addr,port>. 598 * In case of multiple records with the same <proto,addr,port>, only 599 * the first found record is returned. 600 * 601 * To be called under RCU lock. 602 */ 603 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, 604 __u16 protocol, 605 const union nf_inet_addr *daddr, 606 __be16 dport) 607 { 608 unsigned int hash; 609 struct ip_vs_dest *dest; 610 611 /* Check for "full" addressed entries */ 612 hash = ip_vs_rs_hashkey(af, daddr, dport); 613 614 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 615 if (dest->port == dport && 616 dest->af == af && 617 ip_vs_addr_equal(af, &dest->addr, daddr) && 618 (dest->protocol == protocol || dest->vfwmark) && 619 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 620 /* HIT */ 621 return dest; 622 } 623 } 624 625 return NULL; 626 } 627 628 /* Find real service record by <af,addr,tun_port>. 629 * In case of multiple records with the same <af,addr,tun_port>, only 630 * the first found record is returned. 631 * 632 * To be called under RCU lock. 633 */ 634 struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af, 635 const union nf_inet_addr *daddr, 636 __be16 tun_port) 637 { 638 struct ip_vs_dest *dest; 639 unsigned int hash; 640 641 /* Check for "full" addressed entries */ 642 hash = ip_vs_rs_hashkey(af, daddr, tun_port); 643 644 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 645 if (dest->tun_port == tun_port && 646 dest->af == af && 647 ip_vs_addr_equal(af, &dest->addr, daddr) && 648 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) { 649 /* HIT */ 650 return dest; 651 } 652 } 653 654 return NULL; 655 } 656 657 /* Lookup destination by {addr,port} in the given service 658 * Called under RCU lock. 659 */ 660 static struct ip_vs_dest * 661 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, 662 const union nf_inet_addr *daddr, __be16 dport) 663 { 664 struct ip_vs_dest *dest; 665 666 /* 667 * Find the destination for the given service 668 */ 669 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 670 if ((dest->af == dest_af) && 671 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 672 (dest->port == dport)) { 673 /* HIT */ 674 return dest; 675 } 676 } 677 678 return NULL; 679 } 680 681 /* 682 * Find destination by {daddr,dport,vaddr,protocol} 683 * Created to be used in ip_vs_process_message() in 684 * the backup synchronization daemon. It finds the 685 * destination to be bound to the received connection 686 * on the backup. 687 * Called under RCU lock, no refcnt is returned. 688 */ 689 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af, 690 const union nf_inet_addr *daddr, 691 __be16 dport, 692 const union nf_inet_addr *vaddr, 693 __be16 vport, __u16 protocol, __u32 fwmark, 694 __u32 flags) 695 { 696 struct ip_vs_dest *dest; 697 struct ip_vs_service *svc; 698 __be16 port = dport; 699 700 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport); 701 if (!svc) 702 return NULL; 703 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 704 port = 0; 705 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port); 706 if (!dest) 707 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport); 708 return dest; 709 } 710 711 void ip_vs_dest_dst_rcu_free(struct rcu_head *head) 712 { 713 struct ip_vs_dest_dst *dest_dst = container_of(head, 714 struct ip_vs_dest_dst, 715 rcu_head); 716 717 dst_release(dest_dst->dst_cache); 718 kfree(dest_dst); 719 } 720 721 /* Release dest_dst and dst_cache for dest in user context */ 722 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) 723 { 724 struct ip_vs_dest_dst *old; 725 726 old = rcu_dereference_protected(dest->dest_dst, 1); 727 if (old) { 728 RCU_INIT_POINTER(dest->dest_dst, NULL); 729 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 730 } 731 } 732 733 /* 734 * Lookup dest by {svc,addr,port} in the destination trash. 735 * The destination trash is used to hold the destinations that are removed 736 * from the service table but are still referenced by some conn entries. 737 * The reason to add the destination trash is when the dest is temporary 738 * down (either by administrator or by monitor program), the dest can be 739 * picked back from the trash, the remaining connections to the dest can 740 * continue, and the counting information of the dest is also useful for 741 * scheduling. 742 */ 743 static struct ip_vs_dest * 744 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, 745 const union nf_inet_addr *daddr, __be16 dport) 746 { 747 struct ip_vs_dest *dest; 748 struct netns_ipvs *ipvs = svc->ipvs; 749 750 /* 751 * Find the destination in trash 752 */ 753 spin_lock_bh(&ipvs->dest_trash_lock); 754 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 755 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 756 "dest->refcnt=%d\n", 757 dest->vfwmark, 758 IP_VS_DBG_ADDR(dest->af, &dest->addr), 759 ntohs(dest->port), 760 refcount_read(&dest->refcnt)); 761 if (dest->af == dest_af && 762 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 763 dest->port == dport && 764 dest->vfwmark == svc->fwmark && 765 dest->protocol == svc->protocol && 766 (svc->fwmark || 767 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 768 dest->vport == svc->port))) { 769 /* HIT */ 770 list_del(&dest->t_list); 771 goto out; 772 } 773 } 774 775 dest = NULL; 776 777 out: 778 spin_unlock_bh(&ipvs->dest_trash_lock); 779 780 return dest; 781 } 782 783 static void ip_vs_dest_free(struct ip_vs_dest *dest) 784 { 785 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); 786 787 __ip_vs_dst_cache_reset(dest); 788 __ip_vs_svc_put(svc, false); 789 free_percpu(dest->stats.cpustats); 790 ip_vs_dest_put_and_free(dest); 791 } 792 793 /* 794 * Clean up all the destinations in the trash 795 * Called by the ip_vs_control_cleanup() 796 * 797 * When the ip_vs_control_clearup is activated by ipvs module exit, 798 * the service tables must have been flushed and all the connections 799 * are expired, and the refcnt of each destination in the trash must 800 * be 1, so we simply release them here. 801 */ 802 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) 803 { 804 struct ip_vs_dest *dest, *nxt; 805 806 del_timer_sync(&ipvs->dest_trash_timer); 807 /* No need to use dest_trash_lock */ 808 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { 809 list_del(&dest->t_list); 810 ip_vs_dest_free(dest); 811 } 812 } 813 814 static void 815 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) 816 { 817 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c 818 819 spin_lock_bh(&src->lock); 820 821 IP_VS_SHOW_STATS_COUNTER(conns); 822 IP_VS_SHOW_STATS_COUNTER(inpkts); 823 IP_VS_SHOW_STATS_COUNTER(outpkts); 824 IP_VS_SHOW_STATS_COUNTER(inbytes); 825 IP_VS_SHOW_STATS_COUNTER(outbytes); 826 827 ip_vs_read_estimator(dst, src); 828 829 spin_unlock_bh(&src->lock); 830 } 831 832 static void 833 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) 834 { 835 dst->conns = (u32)src->conns; 836 dst->inpkts = (u32)src->inpkts; 837 dst->outpkts = (u32)src->outpkts; 838 dst->inbytes = src->inbytes; 839 dst->outbytes = src->outbytes; 840 dst->cps = (u32)src->cps; 841 dst->inpps = (u32)src->inpps; 842 dst->outpps = (u32)src->outpps; 843 dst->inbps = (u32)src->inbps; 844 dst->outbps = (u32)src->outbps; 845 } 846 847 static void 848 ip_vs_zero_stats(struct ip_vs_stats *stats) 849 { 850 spin_lock_bh(&stats->lock); 851 852 /* get current counters as zero point, rates are zeroed */ 853 854 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c 855 856 IP_VS_ZERO_STATS_COUNTER(conns); 857 IP_VS_ZERO_STATS_COUNTER(inpkts); 858 IP_VS_ZERO_STATS_COUNTER(outpkts); 859 IP_VS_ZERO_STATS_COUNTER(inbytes); 860 IP_VS_ZERO_STATS_COUNTER(outbytes); 861 862 ip_vs_zero_estimator(stats); 863 864 spin_unlock_bh(&stats->lock); 865 } 866 867 /* 868 * Update a destination in the given service 869 */ 870 static void 871 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 872 struct ip_vs_dest_user_kern *udest, int add) 873 { 874 struct netns_ipvs *ipvs = svc->ipvs; 875 struct ip_vs_service *old_svc; 876 struct ip_vs_scheduler *sched; 877 int conn_flags; 878 879 /* We cannot modify an address and change the address family */ 880 BUG_ON(!add && udest->af != dest->af); 881 882 if (add && udest->af != svc->af) 883 ipvs->mixed_address_family_dests++; 884 885 /* keep the last_weight with latest non-0 weight */ 886 if (add || udest->weight != 0) 887 atomic_set(&dest->last_weight, udest->weight); 888 889 /* set the weight and the flags */ 890 atomic_set(&dest->weight, udest->weight); 891 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 892 conn_flags |= IP_VS_CONN_F_INACTIVE; 893 894 /* Need to rehash? */ 895 if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) != 896 IP_VS_DFWD_METHOD(dest) || 897 udest->tun_type != dest->tun_type || 898 udest->tun_port != dest->tun_port) 899 ip_vs_rs_unhash(dest); 900 901 /* set the tunnel info */ 902 dest->tun_type = udest->tun_type; 903 dest->tun_port = udest->tun_port; 904 dest->tun_flags = udest->tun_flags; 905 906 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 907 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 908 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 909 } else { 910 /* FTP-NAT requires conntrack for mangling */ 911 if (svc->port == FTPPORT) 912 ip_vs_register_conntrack(svc); 913 } 914 atomic_set(&dest->conn_flags, conn_flags); 915 /* Put the real service in rs_table if not present. */ 916 ip_vs_rs_hash(ipvs, dest); 917 918 /* bind the service */ 919 old_svc = rcu_dereference_protected(dest->svc, 1); 920 if (!old_svc) { 921 __ip_vs_bind_svc(dest, svc); 922 } else { 923 if (old_svc != svc) { 924 ip_vs_zero_stats(&dest->stats); 925 __ip_vs_bind_svc(dest, svc); 926 __ip_vs_svc_put(old_svc, true); 927 } 928 } 929 930 /* set the dest status flags */ 931 dest->flags |= IP_VS_DEST_F_AVAILABLE; 932 933 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 934 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 935 dest->u_threshold = udest->u_threshold; 936 dest->l_threshold = udest->l_threshold; 937 938 dest->af = udest->af; 939 940 spin_lock_bh(&dest->dst_lock); 941 __ip_vs_dst_cache_reset(dest); 942 spin_unlock_bh(&dest->dst_lock); 943 944 if (add) { 945 ip_vs_start_estimator(svc->ipvs, &dest->stats); 946 list_add_rcu(&dest->n_list, &svc->destinations); 947 svc->num_dests++; 948 sched = rcu_dereference_protected(svc->scheduler, 1); 949 if (sched && sched->add_dest) 950 sched->add_dest(svc, dest); 951 } else { 952 sched = rcu_dereference_protected(svc->scheduler, 1); 953 if (sched && sched->upd_dest) 954 sched->upd_dest(svc, dest); 955 } 956 } 957 958 959 /* 960 * Create a destination for the given service 961 */ 962 static int 963 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 964 { 965 struct ip_vs_dest *dest; 966 unsigned int atype, i; 967 968 EnterFunction(2); 969 970 #ifdef CONFIG_IP_VS_IPV6 971 if (udest->af == AF_INET6) { 972 int ret; 973 974 atype = ipv6_addr_type(&udest->addr.in6); 975 if ((!(atype & IPV6_ADDR_UNICAST) || 976 atype & IPV6_ADDR_LINKLOCAL) && 977 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) 978 return -EINVAL; 979 980 ret = nf_defrag_ipv6_enable(svc->ipvs->net); 981 if (ret) 982 return ret; 983 } else 984 #endif 985 { 986 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip); 987 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 988 return -EINVAL; 989 } 990 991 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); 992 if (dest == NULL) 993 return -ENOMEM; 994 995 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 996 if (!dest->stats.cpustats) 997 goto err_alloc; 998 999 for_each_possible_cpu(i) { 1000 struct ip_vs_cpu_stats *ip_vs_dest_stats; 1001 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); 1002 u64_stats_init(&ip_vs_dest_stats->syncp); 1003 } 1004 1005 dest->af = udest->af; 1006 dest->protocol = svc->protocol; 1007 dest->vaddr = svc->addr; 1008 dest->vport = svc->port; 1009 dest->vfwmark = svc->fwmark; 1010 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); 1011 dest->port = udest->port; 1012 1013 atomic_set(&dest->activeconns, 0); 1014 atomic_set(&dest->inactconns, 0); 1015 atomic_set(&dest->persistconns, 0); 1016 refcount_set(&dest->refcnt, 1); 1017 1018 INIT_HLIST_NODE(&dest->d_list); 1019 spin_lock_init(&dest->dst_lock); 1020 spin_lock_init(&dest->stats.lock); 1021 __ip_vs_update_dest(svc, dest, udest, 1); 1022 1023 LeaveFunction(2); 1024 return 0; 1025 1026 err_alloc: 1027 kfree(dest); 1028 return -ENOMEM; 1029 } 1030 1031 1032 /* 1033 * Add a destination into an existing service 1034 */ 1035 static int 1036 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1037 { 1038 struct ip_vs_dest *dest; 1039 union nf_inet_addr daddr; 1040 __be16 dport = udest->port; 1041 int ret; 1042 1043 EnterFunction(2); 1044 1045 if (udest->weight < 0) { 1046 pr_err("%s(): server weight less than zero\n", __func__); 1047 return -ERANGE; 1048 } 1049 1050 if (udest->l_threshold > udest->u_threshold) { 1051 pr_err("%s(): lower threshold is higher than upper threshold\n", 1052 __func__); 1053 return -ERANGE; 1054 } 1055 1056 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1057 if (udest->tun_port == 0) { 1058 pr_err("%s(): tunnel port is zero\n", __func__); 1059 return -EINVAL; 1060 } 1061 } 1062 1063 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1064 1065 /* We use function that requires RCU lock */ 1066 rcu_read_lock(); 1067 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1068 rcu_read_unlock(); 1069 1070 if (dest != NULL) { 1071 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 1072 return -EEXIST; 1073 } 1074 1075 /* 1076 * Check if the dest already exists in the trash and 1077 * is from the same service 1078 */ 1079 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport); 1080 1081 if (dest != NULL) { 1082 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 1083 "dest->refcnt=%d, service %u/%s:%u\n", 1084 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), 1085 refcount_read(&dest->refcnt), 1086 dest->vfwmark, 1087 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 1088 ntohs(dest->vport)); 1089 1090 __ip_vs_update_dest(svc, dest, udest, 1); 1091 ret = 0; 1092 } else { 1093 /* 1094 * Allocate and initialize the dest structure 1095 */ 1096 ret = ip_vs_new_dest(svc, udest); 1097 } 1098 LeaveFunction(2); 1099 1100 return ret; 1101 } 1102 1103 1104 /* 1105 * Edit a destination in the given service 1106 */ 1107 static int 1108 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1109 { 1110 struct ip_vs_dest *dest; 1111 union nf_inet_addr daddr; 1112 __be16 dport = udest->port; 1113 1114 EnterFunction(2); 1115 1116 if (udest->weight < 0) { 1117 pr_err("%s(): server weight less than zero\n", __func__); 1118 return -ERANGE; 1119 } 1120 1121 if (udest->l_threshold > udest->u_threshold) { 1122 pr_err("%s(): lower threshold is higher than upper threshold\n", 1123 __func__); 1124 return -ERANGE; 1125 } 1126 1127 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1128 if (udest->tun_port == 0) { 1129 pr_err("%s(): tunnel port is zero\n", __func__); 1130 return -EINVAL; 1131 } 1132 } 1133 1134 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1135 1136 /* We use function that requires RCU lock */ 1137 rcu_read_lock(); 1138 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1139 rcu_read_unlock(); 1140 1141 if (dest == NULL) { 1142 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 1143 return -ENOENT; 1144 } 1145 1146 __ip_vs_update_dest(svc, dest, udest, 0); 1147 LeaveFunction(2); 1148 1149 return 0; 1150 } 1151 1152 /* 1153 * Delete a destination (must be already unlinked from the service) 1154 */ 1155 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, 1156 bool cleanup) 1157 { 1158 ip_vs_stop_estimator(ipvs, &dest->stats); 1159 1160 /* 1161 * Remove it from the d-linked list with the real services. 1162 */ 1163 ip_vs_rs_unhash(dest); 1164 1165 spin_lock_bh(&ipvs->dest_trash_lock); 1166 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1167 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1168 refcount_read(&dest->refcnt)); 1169 if (list_empty(&ipvs->dest_trash) && !cleanup) 1170 mod_timer(&ipvs->dest_trash_timer, 1171 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1172 /* dest lives in trash with reference */ 1173 list_add(&dest->t_list, &ipvs->dest_trash); 1174 dest->idle_start = 0; 1175 spin_unlock_bh(&ipvs->dest_trash_lock); 1176 1177 /* Queue up delayed work to expire all no destination connections. 1178 * No-op when CONFIG_SYSCTL is disabled. 1179 */ 1180 if (!cleanup) 1181 ip_vs_enqueue_expire_nodest_conns(ipvs); 1182 } 1183 1184 1185 /* 1186 * Unlink a destination from the given service 1187 */ 1188 static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1189 struct ip_vs_dest *dest, 1190 int svcupd) 1191 { 1192 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1193 1194 /* 1195 * Remove it from the d-linked destination list. 1196 */ 1197 list_del_rcu(&dest->n_list); 1198 svc->num_dests--; 1199 1200 if (dest->af != svc->af) 1201 svc->ipvs->mixed_address_family_dests--; 1202 1203 if (svcupd) { 1204 struct ip_vs_scheduler *sched; 1205 1206 sched = rcu_dereference_protected(svc->scheduler, 1); 1207 if (sched && sched->del_dest) 1208 sched->del_dest(svc, dest); 1209 } 1210 } 1211 1212 1213 /* 1214 * Delete a destination server in the given service 1215 */ 1216 static int 1217 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1218 { 1219 struct ip_vs_dest *dest; 1220 __be16 dport = udest->port; 1221 1222 EnterFunction(2); 1223 1224 /* We use function that requires RCU lock */ 1225 rcu_read_lock(); 1226 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); 1227 rcu_read_unlock(); 1228 1229 if (dest == NULL) { 1230 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1231 return -ENOENT; 1232 } 1233 1234 /* 1235 * Unlink dest from the service 1236 */ 1237 __ip_vs_unlink_dest(svc, dest, 1); 1238 1239 /* 1240 * Delete the destination 1241 */ 1242 __ip_vs_del_dest(svc->ipvs, dest, false); 1243 1244 LeaveFunction(2); 1245 1246 return 0; 1247 } 1248 1249 static void ip_vs_dest_trash_expire(struct timer_list *t) 1250 { 1251 struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer); 1252 struct ip_vs_dest *dest, *next; 1253 unsigned long now = jiffies; 1254 1255 spin_lock(&ipvs->dest_trash_lock); 1256 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1257 if (refcount_read(&dest->refcnt) > 1) 1258 continue; 1259 if (dest->idle_start) { 1260 if (time_before(now, dest->idle_start + 1261 IP_VS_DEST_TRASH_PERIOD)) 1262 continue; 1263 } else { 1264 dest->idle_start = max(1UL, now); 1265 continue; 1266 } 1267 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1268 dest->vfwmark, 1269 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1270 ntohs(dest->port)); 1271 list_del(&dest->t_list); 1272 ip_vs_dest_free(dest); 1273 } 1274 if (!list_empty(&ipvs->dest_trash)) 1275 mod_timer(&ipvs->dest_trash_timer, 1276 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1277 spin_unlock(&ipvs->dest_trash_lock); 1278 } 1279 1280 /* 1281 * Add a service into the service hash table 1282 */ 1283 static int 1284 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, 1285 struct ip_vs_service **svc_p) 1286 { 1287 int ret = 0, i; 1288 struct ip_vs_scheduler *sched = NULL; 1289 struct ip_vs_pe *pe = NULL; 1290 struct ip_vs_service *svc = NULL; 1291 int ret_hooks = -1; 1292 1293 /* increase the module use count */ 1294 if (!ip_vs_use_count_inc()) 1295 return -ENOPROTOOPT; 1296 1297 /* Lookup the scheduler by 'u->sched_name' */ 1298 if (strcmp(u->sched_name, "none")) { 1299 sched = ip_vs_scheduler_get(u->sched_name); 1300 if (!sched) { 1301 pr_info("Scheduler module ip_vs_%s not found\n", 1302 u->sched_name); 1303 ret = -ENOENT; 1304 goto out_err; 1305 } 1306 } 1307 1308 if (u->pe_name && *u->pe_name) { 1309 pe = ip_vs_pe_getbyname(u->pe_name); 1310 if (pe == NULL) { 1311 pr_info("persistence engine module ip_vs_pe_%s " 1312 "not found\n", u->pe_name); 1313 ret = -ENOENT; 1314 goto out_err; 1315 } 1316 } 1317 1318 #ifdef CONFIG_IP_VS_IPV6 1319 if (u->af == AF_INET6) { 1320 __u32 plen = (__force __u32) u->netmask; 1321 1322 if (plen < 1 || plen > 128) { 1323 ret = -EINVAL; 1324 goto out_err; 1325 } 1326 1327 ret = nf_defrag_ipv6_enable(ipvs->net); 1328 if (ret) 1329 goto out_err; 1330 } 1331 #endif 1332 1333 if ((u->af == AF_INET && !ipvs->num_services) || 1334 (u->af == AF_INET6 && !ipvs->num_services6)) { 1335 ret = ip_vs_register_hooks(ipvs, u->af); 1336 if (ret < 0) 1337 goto out_err; 1338 ret_hooks = ret; 1339 } 1340 1341 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); 1342 if (svc == NULL) { 1343 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1344 ret = -ENOMEM; 1345 goto out_err; 1346 } 1347 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1348 if (!svc->stats.cpustats) { 1349 ret = -ENOMEM; 1350 goto out_err; 1351 } 1352 1353 for_each_possible_cpu(i) { 1354 struct ip_vs_cpu_stats *ip_vs_stats; 1355 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); 1356 u64_stats_init(&ip_vs_stats->syncp); 1357 } 1358 1359 1360 /* I'm the first user of the service */ 1361 atomic_set(&svc->refcnt, 0); 1362 1363 svc->af = u->af; 1364 svc->protocol = u->protocol; 1365 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1366 svc->port = u->port; 1367 svc->fwmark = u->fwmark; 1368 svc->flags = u->flags & ~IP_VS_SVC_F_HASHED; 1369 svc->timeout = u->timeout * HZ; 1370 svc->netmask = u->netmask; 1371 svc->ipvs = ipvs; 1372 1373 INIT_LIST_HEAD(&svc->destinations); 1374 spin_lock_init(&svc->sched_lock); 1375 spin_lock_init(&svc->stats.lock); 1376 1377 /* Bind the scheduler */ 1378 if (sched) { 1379 ret = ip_vs_bind_scheduler(svc, sched); 1380 if (ret) 1381 goto out_err; 1382 sched = NULL; 1383 } 1384 1385 /* Bind the ct retriever */ 1386 RCU_INIT_POINTER(svc->pe, pe); 1387 pe = NULL; 1388 1389 /* Update the virtual service counters */ 1390 if (svc->port == FTPPORT) 1391 atomic_inc(&ipvs->ftpsvc_counter); 1392 else if (svc->port == 0) 1393 atomic_inc(&ipvs->nullsvc_counter); 1394 if (svc->pe && svc->pe->conn_out) 1395 atomic_inc(&ipvs->conn_out_counter); 1396 1397 ip_vs_start_estimator(ipvs, &svc->stats); 1398 1399 /* Count only IPv4 services for old get/setsockopt interface */ 1400 if (svc->af == AF_INET) 1401 ipvs->num_services++; 1402 else if (svc->af == AF_INET6) 1403 ipvs->num_services6++; 1404 1405 /* Hash the service into the service table */ 1406 ip_vs_svc_hash(svc); 1407 1408 *svc_p = svc; 1409 /* Now there is a service - full throttle */ 1410 ipvs->enable = 1; 1411 return 0; 1412 1413 1414 out_err: 1415 if (ret_hooks >= 0) 1416 ip_vs_unregister_hooks(ipvs, u->af); 1417 if (svc != NULL) { 1418 ip_vs_unbind_scheduler(svc, sched); 1419 ip_vs_service_free(svc); 1420 } 1421 ip_vs_scheduler_put(sched); 1422 ip_vs_pe_put(pe); 1423 1424 /* decrease the module use count */ 1425 ip_vs_use_count_dec(); 1426 1427 return ret; 1428 } 1429 1430 1431 /* 1432 * Edit a service and bind it with a new scheduler 1433 */ 1434 static int 1435 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1436 { 1437 struct ip_vs_scheduler *sched = NULL, *old_sched; 1438 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1439 int ret = 0; 1440 bool new_pe_conn_out, old_pe_conn_out; 1441 1442 /* 1443 * Lookup the scheduler, by 'u->sched_name' 1444 */ 1445 if (strcmp(u->sched_name, "none")) { 1446 sched = ip_vs_scheduler_get(u->sched_name); 1447 if (!sched) { 1448 pr_info("Scheduler module ip_vs_%s not found\n", 1449 u->sched_name); 1450 return -ENOENT; 1451 } 1452 } 1453 old_sched = sched; 1454 1455 if (u->pe_name && *u->pe_name) { 1456 pe = ip_vs_pe_getbyname(u->pe_name); 1457 if (pe == NULL) { 1458 pr_info("persistence engine module ip_vs_pe_%s " 1459 "not found\n", u->pe_name); 1460 ret = -ENOENT; 1461 goto out; 1462 } 1463 old_pe = pe; 1464 } 1465 1466 #ifdef CONFIG_IP_VS_IPV6 1467 if (u->af == AF_INET6) { 1468 __u32 plen = (__force __u32) u->netmask; 1469 1470 if (plen < 1 || plen > 128) { 1471 ret = -EINVAL; 1472 goto out; 1473 } 1474 } 1475 #endif 1476 1477 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1478 if (sched != old_sched) { 1479 if (old_sched) { 1480 ip_vs_unbind_scheduler(svc, old_sched); 1481 RCU_INIT_POINTER(svc->scheduler, NULL); 1482 /* Wait all svc->sched_data users */ 1483 synchronize_rcu(); 1484 } 1485 /* Bind the new scheduler */ 1486 if (sched) { 1487 ret = ip_vs_bind_scheduler(svc, sched); 1488 if (ret) { 1489 ip_vs_scheduler_put(sched); 1490 goto out; 1491 } 1492 } 1493 } 1494 1495 /* 1496 * Set the flags and timeout value 1497 */ 1498 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1499 svc->timeout = u->timeout * HZ; 1500 svc->netmask = u->netmask; 1501 1502 old_pe = rcu_dereference_protected(svc->pe, 1); 1503 if (pe != old_pe) { 1504 rcu_assign_pointer(svc->pe, pe); 1505 /* check for optional methods in new pe */ 1506 new_pe_conn_out = (pe && pe->conn_out) ? true : false; 1507 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; 1508 if (new_pe_conn_out && !old_pe_conn_out) 1509 atomic_inc(&svc->ipvs->conn_out_counter); 1510 if (old_pe_conn_out && !new_pe_conn_out) 1511 atomic_dec(&svc->ipvs->conn_out_counter); 1512 } 1513 1514 out: 1515 ip_vs_scheduler_put(old_sched); 1516 ip_vs_pe_put(old_pe); 1517 return ret; 1518 } 1519 1520 /* 1521 * Delete a service from the service list 1522 * - The service must be unlinked, unlocked and not referenced! 1523 * - We are called under _bh lock 1524 */ 1525 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) 1526 { 1527 struct ip_vs_dest *dest, *nxt; 1528 struct ip_vs_scheduler *old_sched; 1529 struct ip_vs_pe *old_pe; 1530 struct netns_ipvs *ipvs = svc->ipvs; 1531 1532 if (svc->af == AF_INET) { 1533 ipvs->num_services--; 1534 if (!ipvs->num_services) 1535 ip_vs_unregister_hooks(ipvs, svc->af); 1536 } else if (svc->af == AF_INET6) { 1537 ipvs->num_services6--; 1538 if (!ipvs->num_services6) 1539 ip_vs_unregister_hooks(ipvs, svc->af); 1540 } 1541 1542 ip_vs_stop_estimator(svc->ipvs, &svc->stats); 1543 1544 /* Unbind scheduler */ 1545 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1546 ip_vs_unbind_scheduler(svc, old_sched); 1547 ip_vs_scheduler_put(old_sched); 1548 1549 /* Unbind persistence engine, keep svc->pe */ 1550 old_pe = rcu_dereference_protected(svc->pe, 1); 1551 if (old_pe && old_pe->conn_out) 1552 atomic_dec(&ipvs->conn_out_counter); 1553 ip_vs_pe_put(old_pe); 1554 1555 /* 1556 * Unlink the whole destination list 1557 */ 1558 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1559 __ip_vs_unlink_dest(svc, dest, 0); 1560 __ip_vs_del_dest(svc->ipvs, dest, cleanup); 1561 } 1562 1563 /* 1564 * Update the virtual service counters 1565 */ 1566 if (svc->port == FTPPORT) 1567 atomic_dec(&ipvs->ftpsvc_counter); 1568 else if (svc->port == 0) 1569 atomic_dec(&ipvs->nullsvc_counter); 1570 1571 /* 1572 * Free the service if nobody refers to it 1573 */ 1574 __ip_vs_svc_put(svc, true); 1575 1576 /* decrease the module use count */ 1577 ip_vs_use_count_dec(); 1578 } 1579 1580 /* 1581 * Unlink a service from list and try to delete it if its refcnt reached 0 1582 */ 1583 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1584 { 1585 ip_vs_unregister_conntrack(svc); 1586 /* Hold svc to avoid double release from dest_trash */ 1587 atomic_inc(&svc->refcnt); 1588 /* 1589 * Unhash it from the service table 1590 */ 1591 ip_vs_svc_unhash(svc); 1592 1593 __ip_vs_del_service(svc, cleanup); 1594 } 1595 1596 /* 1597 * Delete a service from the service list 1598 */ 1599 static int ip_vs_del_service(struct ip_vs_service *svc) 1600 { 1601 if (svc == NULL) 1602 return -EEXIST; 1603 ip_vs_unlink_service(svc, false); 1604 1605 return 0; 1606 } 1607 1608 1609 /* 1610 * Flush all the virtual services 1611 */ 1612 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) 1613 { 1614 int idx; 1615 struct ip_vs_service *svc; 1616 struct hlist_node *n; 1617 1618 /* 1619 * Flush the service table hashed by <netns,protocol,addr,port> 1620 */ 1621 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1622 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1623 s_list) { 1624 if (svc->ipvs == ipvs) 1625 ip_vs_unlink_service(svc, cleanup); 1626 } 1627 } 1628 1629 /* 1630 * Flush the service table hashed by fwmark 1631 */ 1632 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1633 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1634 f_list) { 1635 if (svc->ipvs == ipvs) 1636 ip_vs_unlink_service(svc, cleanup); 1637 } 1638 } 1639 1640 return 0; 1641 } 1642 1643 /* 1644 * Delete service by {netns} in the service table. 1645 * Called by __ip_vs_batch_cleanup() 1646 */ 1647 void ip_vs_service_nets_cleanup(struct list_head *net_list) 1648 { 1649 struct netns_ipvs *ipvs; 1650 struct net *net; 1651 1652 EnterFunction(2); 1653 /* Check for "full" addressed entries */ 1654 mutex_lock(&__ip_vs_mutex); 1655 list_for_each_entry(net, net_list, exit_list) { 1656 ipvs = net_ipvs(net); 1657 ip_vs_flush(ipvs, true); 1658 } 1659 mutex_unlock(&__ip_vs_mutex); 1660 LeaveFunction(2); 1661 } 1662 1663 /* Put all references for device (dst_cache) */ 1664 static inline void 1665 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) 1666 { 1667 struct ip_vs_dest_dst *dest_dst; 1668 1669 spin_lock_bh(&dest->dst_lock); 1670 dest_dst = rcu_dereference_protected(dest->dest_dst, 1); 1671 if (dest_dst && dest_dst->dst_cache->dev == dev) { 1672 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1673 dev->name, 1674 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1675 ntohs(dest->port), 1676 refcount_read(&dest->refcnt)); 1677 __ip_vs_dst_cache_reset(dest); 1678 } 1679 spin_unlock_bh(&dest->dst_lock); 1680 1681 } 1682 /* Netdev event receiver 1683 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1684 */ 1685 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1686 void *ptr) 1687 { 1688 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1689 struct net *net = dev_net(dev); 1690 struct netns_ipvs *ipvs = net_ipvs(net); 1691 struct ip_vs_service *svc; 1692 struct ip_vs_dest *dest; 1693 unsigned int idx; 1694 1695 if (event != NETDEV_DOWN || !ipvs) 1696 return NOTIFY_DONE; 1697 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1698 EnterFunction(2); 1699 mutex_lock(&__ip_vs_mutex); 1700 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1701 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1702 if (svc->ipvs == ipvs) { 1703 list_for_each_entry(dest, &svc->destinations, 1704 n_list) { 1705 ip_vs_forget_dev(dest, dev); 1706 } 1707 } 1708 } 1709 1710 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1711 if (svc->ipvs == ipvs) { 1712 list_for_each_entry(dest, &svc->destinations, 1713 n_list) { 1714 ip_vs_forget_dev(dest, dev); 1715 } 1716 } 1717 1718 } 1719 } 1720 1721 spin_lock_bh(&ipvs->dest_trash_lock); 1722 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 1723 ip_vs_forget_dev(dest, dev); 1724 } 1725 spin_unlock_bh(&ipvs->dest_trash_lock); 1726 mutex_unlock(&__ip_vs_mutex); 1727 LeaveFunction(2); 1728 return NOTIFY_DONE; 1729 } 1730 1731 /* 1732 * Zero counters in a service or all services 1733 */ 1734 static int ip_vs_zero_service(struct ip_vs_service *svc) 1735 { 1736 struct ip_vs_dest *dest; 1737 1738 list_for_each_entry(dest, &svc->destinations, n_list) { 1739 ip_vs_zero_stats(&dest->stats); 1740 } 1741 ip_vs_zero_stats(&svc->stats); 1742 return 0; 1743 } 1744 1745 static int ip_vs_zero_all(struct netns_ipvs *ipvs) 1746 { 1747 int idx; 1748 struct ip_vs_service *svc; 1749 1750 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1751 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1752 if (svc->ipvs == ipvs) 1753 ip_vs_zero_service(svc); 1754 } 1755 } 1756 1757 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1758 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1759 if (svc->ipvs == ipvs) 1760 ip_vs_zero_service(svc); 1761 } 1762 } 1763 1764 ip_vs_zero_stats(&ipvs->tot_stats); 1765 return 0; 1766 } 1767 1768 #ifdef CONFIG_SYSCTL 1769 1770 static int three = 3; 1771 1772 static int 1773 proc_do_defense_mode(struct ctl_table *table, int write, 1774 void *buffer, size_t *lenp, loff_t *ppos) 1775 { 1776 struct netns_ipvs *ipvs = table->extra2; 1777 int *valp = table->data; 1778 int val = *valp; 1779 int rc; 1780 1781 struct ctl_table tmp = { 1782 .data = &val, 1783 .maxlen = sizeof(int), 1784 .mode = table->mode, 1785 }; 1786 1787 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1788 if (write && (*valp != val)) { 1789 if (val < 0 || val > 3) { 1790 rc = -EINVAL; 1791 } else { 1792 *valp = val; 1793 update_defense_level(ipvs); 1794 } 1795 } 1796 return rc; 1797 } 1798 1799 static int 1800 proc_do_sync_threshold(struct ctl_table *table, int write, 1801 void *buffer, size_t *lenp, loff_t *ppos) 1802 { 1803 int *valp = table->data; 1804 int val[2]; 1805 int rc; 1806 struct ctl_table tmp = { 1807 .data = &val, 1808 .maxlen = table->maxlen, 1809 .mode = table->mode, 1810 }; 1811 1812 memcpy(val, valp, sizeof(val)); 1813 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1814 if (write) { 1815 if (val[0] < 0 || val[1] < 0 || 1816 (val[0] >= val[1] && val[1])) 1817 rc = -EINVAL; 1818 else 1819 memcpy(valp, val, sizeof(val)); 1820 } 1821 return rc; 1822 } 1823 1824 static int 1825 proc_do_sync_ports(struct ctl_table *table, int write, 1826 void *buffer, size_t *lenp, loff_t *ppos) 1827 { 1828 int *valp = table->data; 1829 int val = *valp; 1830 int rc; 1831 1832 struct ctl_table tmp = { 1833 .data = &val, 1834 .maxlen = sizeof(int), 1835 .mode = table->mode, 1836 }; 1837 1838 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1839 if (write && (*valp != val)) { 1840 if (val < 1 || !is_power_of_2(val)) 1841 rc = -EINVAL; 1842 else 1843 *valp = val; 1844 } 1845 return rc; 1846 } 1847 1848 /* 1849 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1850 * Do not change order or insert new entries without 1851 * align with netns init in ip_vs_control_net_init() 1852 */ 1853 1854 static struct ctl_table vs_vars[] = { 1855 { 1856 .procname = "amemthresh", 1857 .maxlen = sizeof(int), 1858 .mode = 0644, 1859 .proc_handler = proc_dointvec, 1860 }, 1861 { 1862 .procname = "am_droprate", 1863 .maxlen = sizeof(int), 1864 .mode = 0644, 1865 .proc_handler = proc_dointvec, 1866 }, 1867 { 1868 .procname = "drop_entry", 1869 .maxlen = sizeof(int), 1870 .mode = 0644, 1871 .proc_handler = proc_do_defense_mode, 1872 }, 1873 { 1874 .procname = "drop_packet", 1875 .maxlen = sizeof(int), 1876 .mode = 0644, 1877 .proc_handler = proc_do_defense_mode, 1878 }, 1879 #ifdef CONFIG_IP_VS_NFCT 1880 { 1881 .procname = "conntrack", 1882 .maxlen = sizeof(int), 1883 .mode = 0644, 1884 .proc_handler = &proc_dointvec, 1885 }, 1886 #endif 1887 { 1888 .procname = "secure_tcp", 1889 .maxlen = sizeof(int), 1890 .mode = 0644, 1891 .proc_handler = proc_do_defense_mode, 1892 }, 1893 { 1894 .procname = "snat_reroute", 1895 .maxlen = sizeof(int), 1896 .mode = 0644, 1897 .proc_handler = &proc_dointvec, 1898 }, 1899 { 1900 .procname = "sync_version", 1901 .maxlen = sizeof(int), 1902 .mode = 0644, 1903 .proc_handler = proc_dointvec_minmax, 1904 .extra1 = SYSCTL_ZERO, 1905 .extra2 = SYSCTL_ONE, 1906 }, 1907 { 1908 .procname = "sync_ports", 1909 .maxlen = sizeof(int), 1910 .mode = 0644, 1911 .proc_handler = proc_do_sync_ports, 1912 }, 1913 { 1914 .procname = "sync_persist_mode", 1915 .maxlen = sizeof(int), 1916 .mode = 0644, 1917 .proc_handler = proc_dointvec, 1918 }, 1919 { 1920 .procname = "sync_qlen_max", 1921 .maxlen = sizeof(unsigned long), 1922 .mode = 0644, 1923 .proc_handler = proc_doulongvec_minmax, 1924 }, 1925 { 1926 .procname = "sync_sock_size", 1927 .maxlen = sizeof(int), 1928 .mode = 0644, 1929 .proc_handler = proc_dointvec, 1930 }, 1931 { 1932 .procname = "cache_bypass", 1933 .maxlen = sizeof(int), 1934 .mode = 0644, 1935 .proc_handler = proc_dointvec, 1936 }, 1937 { 1938 .procname = "expire_nodest_conn", 1939 .maxlen = sizeof(int), 1940 .mode = 0644, 1941 .proc_handler = proc_dointvec, 1942 }, 1943 { 1944 .procname = "sloppy_tcp", 1945 .maxlen = sizeof(int), 1946 .mode = 0644, 1947 .proc_handler = proc_dointvec, 1948 }, 1949 { 1950 .procname = "sloppy_sctp", 1951 .maxlen = sizeof(int), 1952 .mode = 0644, 1953 .proc_handler = proc_dointvec, 1954 }, 1955 { 1956 .procname = "expire_quiescent_template", 1957 .maxlen = sizeof(int), 1958 .mode = 0644, 1959 .proc_handler = proc_dointvec, 1960 }, 1961 { 1962 .procname = "sync_threshold", 1963 .maxlen = 1964 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1965 .mode = 0644, 1966 .proc_handler = proc_do_sync_threshold, 1967 }, 1968 { 1969 .procname = "sync_refresh_period", 1970 .maxlen = sizeof(int), 1971 .mode = 0644, 1972 .proc_handler = proc_dointvec_jiffies, 1973 }, 1974 { 1975 .procname = "sync_retries", 1976 .maxlen = sizeof(int), 1977 .mode = 0644, 1978 .proc_handler = proc_dointvec_minmax, 1979 .extra1 = SYSCTL_ZERO, 1980 .extra2 = &three, 1981 }, 1982 { 1983 .procname = "nat_icmp_send", 1984 .maxlen = sizeof(int), 1985 .mode = 0644, 1986 .proc_handler = proc_dointvec, 1987 }, 1988 { 1989 .procname = "pmtu_disc", 1990 .maxlen = sizeof(int), 1991 .mode = 0644, 1992 .proc_handler = proc_dointvec, 1993 }, 1994 { 1995 .procname = "backup_only", 1996 .maxlen = sizeof(int), 1997 .mode = 0644, 1998 .proc_handler = proc_dointvec, 1999 }, 2000 { 2001 .procname = "conn_reuse_mode", 2002 .maxlen = sizeof(int), 2003 .mode = 0644, 2004 .proc_handler = proc_dointvec, 2005 }, 2006 { 2007 .procname = "schedule_icmp", 2008 .maxlen = sizeof(int), 2009 .mode = 0644, 2010 .proc_handler = proc_dointvec, 2011 }, 2012 { 2013 .procname = "ignore_tunneled", 2014 .maxlen = sizeof(int), 2015 .mode = 0644, 2016 .proc_handler = proc_dointvec, 2017 }, 2018 { 2019 .procname = "run_estimation", 2020 .maxlen = sizeof(int), 2021 .mode = 0644, 2022 .proc_handler = proc_dointvec, 2023 }, 2024 #ifdef CONFIG_IP_VS_DEBUG 2025 { 2026 .procname = "debug_level", 2027 .data = &sysctl_ip_vs_debug_level, 2028 .maxlen = sizeof(int), 2029 .mode = 0644, 2030 .proc_handler = proc_dointvec, 2031 }, 2032 #endif 2033 { } 2034 }; 2035 2036 #endif 2037 2038 #ifdef CONFIG_PROC_FS 2039 2040 struct ip_vs_iter { 2041 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 2042 struct hlist_head *table; 2043 int bucket; 2044 }; 2045 2046 /* 2047 * Write the contents of the VS rule table to a PROCfs file. 2048 * (It is kept just for backward compatibility) 2049 */ 2050 static inline const char *ip_vs_fwd_name(unsigned int flags) 2051 { 2052 switch (flags & IP_VS_CONN_F_FWD_MASK) { 2053 case IP_VS_CONN_F_LOCALNODE: 2054 return "Local"; 2055 case IP_VS_CONN_F_TUNNEL: 2056 return "Tunnel"; 2057 case IP_VS_CONN_F_DROUTE: 2058 return "Route"; 2059 default: 2060 return "Masq"; 2061 } 2062 } 2063 2064 2065 /* Get the Nth entry in the two lists */ 2066 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 2067 { 2068 struct net *net = seq_file_net(seq); 2069 struct netns_ipvs *ipvs = net_ipvs(net); 2070 struct ip_vs_iter *iter = seq->private; 2071 int idx; 2072 struct ip_vs_service *svc; 2073 2074 /* look in hash by protocol */ 2075 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2076 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 2077 if ((svc->ipvs == ipvs) && pos-- == 0) { 2078 iter->table = ip_vs_svc_table; 2079 iter->bucket = idx; 2080 return svc; 2081 } 2082 } 2083 } 2084 2085 /* keep looking in fwmark */ 2086 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2087 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 2088 f_list) { 2089 if ((svc->ipvs == ipvs) && pos-- == 0) { 2090 iter->table = ip_vs_svc_fwm_table; 2091 iter->bucket = idx; 2092 return svc; 2093 } 2094 } 2095 } 2096 2097 return NULL; 2098 } 2099 2100 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 2101 __acquires(RCU) 2102 { 2103 rcu_read_lock(); 2104 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 2105 } 2106 2107 2108 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2109 { 2110 struct hlist_node *e; 2111 struct ip_vs_iter *iter; 2112 struct ip_vs_service *svc; 2113 2114 ++*pos; 2115 if (v == SEQ_START_TOKEN) 2116 return ip_vs_info_array(seq,0); 2117 2118 svc = v; 2119 iter = seq->private; 2120 2121 if (iter->table == ip_vs_svc_table) { 2122 /* next service in table hashed by protocol */ 2123 e = rcu_dereference(hlist_next_rcu(&svc->s_list)); 2124 if (e) 2125 return hlist_entry(e, struct ip_vs_service, s_list); 2126 2127 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2128 hlist_for_each_entry_rcu(svc, 2129 &ip_vs_svc_table[iter->bucket], 2130 s_list) { 2131 return svc; 2132 } 2133 } 2134 2135 iter->table = ip_vs_svc_fwm_table; 2136 iter->bucket = -1; 2137 goto scan_fwmark; 2138 } 2139 2140 /* next service in hashed by fwmark */ 2141 e = rcu_dereference(hlist_next_rcu(&svc->f_list)); 2142 if (e) 2143 return hlist_entry(e, struct ip_vs_service, f_list); 2144 2145 scan_fwmark: 2146 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2147 hlist_for_each_entry_rcu(svc, 2148 &ip_vs_svc_fwm_table[iter->bucket], 2149 f_list) 2150 return svc; 2151 } 2152 2153 return NULL; 2154 } 2155 2156 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 2157 __releases(RCU) 2158 { 2159 rcu_read_unlock(); 2160 } 2161 2162 2163 static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 2164 { 2165 if (v == SEQ_START_TOKEN) { 2166 seq_printf(seq, 2167 "IP Virtual Server version %d.%d.%d (size=%d)\n", 2168 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2169 seq_puts(seq, 2170 "Prot LocalAddress:Port Scheduler Flags\n"); 2171 seq_puts(seq, 2172 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2173 } else { 2174 struct net *net = seq_file_net(seq); 2175 struct netns_ipvs *ipvs = net_ipvs(net); 2176 const struct ip_vs_service *svc = v; 2177 const struct ip_vs_iter *iter = seq->private; 2178 const struct ip_vs_dest *dest; 2179 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2180 char *sched_name = sched ? sched->name : "none"; 2181 2182 if (svc->ipvs != ipvs) 2183 return 0; 2184 if (iter->table == ip_vs_svc_table) { 2185 #ifdef CONFIG_IP_VS_IPV6 2186 if (svc->af == AF_INET6) 2187 seq_printf(seq, "%s [%pI6]:%04X %s ", 2188 ip_vs_proto_name(svc->protocol), 2189 &svc->addr.in6, 2190 ntohs(svc->port), 2191 sched_name); 2192 else 2193 #endif 2194 seq_printf(seq, "%s %08X:%04X %s %s ", 2195 ip_vs_proto_name(svc->protocol), 2196 ntohl(svc->addr.ip), 2197 ntohs(svc->port), 2198 sched_name, 2199 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2200 } else { 2201 seq_printf(seq, "FWM %08X %s %s", 2202 svc->fwmark, sched_name, 2203 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2204 } 2205 2206 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 2207 seq_printf(seq, "persistent %d %08X\n", 2208 svc->timeout, 2209 ntohl(svc->netmask)); 2210 else 2211 seq_putc(seq, '\n'); 2212 2213 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 2214 #ifdef CONFIG_IP_VS_IPV6 2215 if (dest->af == AF_INET6) 2216 seq_printf(seq, 2217 " -> [%pI6]:%04X" 2218 " %-7s %-6d %-10d %-10d\n", 2219 &dest->addr.in6, 2220 ntohs(dest->port), 2221 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2222 atomic_read(&dest->weight), 2223 atomic_read(&dest->activeconns), 2224 atomic_read(&dest->inactconns)); 2225 else 2226 #endif 2227 seq_printf(seq, 2228 " -> %08X:%04X " 2229 "%-7s %-6d %-10d %-10d\n", 2230 ntohl(dest->addr.ip), 2231 ntohs(dest->port), 2232 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2233 atomic_read(&dest->weight), 2234 atomic_read(&dest->activeconns), 2235 atomic_read(&dest->inactconns)); 2236 2237 } 2238 } 2239 return 0; 2240 } 2241 2242 static const struct seq_operations ip_vs_info_seq_ops = { 2243 .start = ip_vs_info_seq_start, 2244 .next = ip_vs_info_seq_next, 2245 .stop = ip_vs_info_seq_stop, 2246 .show = ip_vs_info_seq_show, 2247 }; 2248 2249 static int ip_vs_stats_show(struct seq_file *seq, void *v) 2250 { 2251 struct net *net = seq_file_single_net(seq); 2252 struct ip_vs_kstats show; 2253 2254 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2255 seq_puts(seq, 2256 " Total Incoming Outgoing Incoming Outgoing\n"); 2257 seq_puts(seq, 2258 " Conns Packets Packets Bytes Bytes\n"); 2259 2260 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); 2261 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", 2262 (unsigned long long)show.conns, 2263 (unsigned long long)show.inpkts, 2264 (unsigned long long)show.outpkts, 2265 (unsigned long long)show.inbytes, 2266 (unsigned long long)show.outbytes); 2267 2268 /* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ 2269 seq_puts(seq, 2270 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2271 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", 2272 (unsigned long long)show.cps, 2273 (unsigned long long)show.inpps, 2274 (unsigned long long)show.outpps, 2275 (unsigned long long)show.inbps, 2276 (unsigned long long)show.outbps); 2277 2278 return 0; 2279 } 2280 2281 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2282 { 2283 struct net *net = seq_file_single_net(seq); 2284 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2285 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; 2286 struct ip_vs_kstats kstats; 2287 int i; 2288 2289 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2290 seq_puts(seq, 2291 " Total Incoming Outgoing Incoming Outgoing\n"); 2292 seq_puts(seq, 2293 "CPU Conns Packets Packets Bytes Bytes\n"); 2294 2295 for_each_possible_cpu(i) { 2296 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); 2297 unsigned int start; 2298 u64 conns, inpkts, outpkts, inbytes, outbytes; 2299 2300 do { 2301 start = u64_stats_fetch_begin_irq(&u->syncp); 2302 conns = u->cnt.conns; 2303 inpkts = u->cnt.inpkts; 2304 outpkts = u->cnt.outpkts; 2305 inbytes = u->cnt.inbytes; 2306 outbytes = u->cnt.outbytes; 2307 } while (u64_stats_fetch_retry_irq(&u->syncp, start)); 2308 2309 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", 2310 i, (u64)conns, (u64)inpkts, 2311 (u64)outpkts, (u64)inbytes, 2312 (u64)outbytes); 2313 } 2314 2315 ip_vs_copy_stats(&kstats, tot_stats); 2316 2317 seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", 2318 (unsigned long long)kstats.conns, 2319 (unsigned long long)kstats.inpkts, 2320 (unsigned long long)kstats.outpkts, 2321 (unsigned long long)kstats.inbytes, 2322 (unsigned long long)kstats.outbytes); 2323 2324 /* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2325 seq_puts(seq, 2326 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2327 seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", 2328 kstats.cps, 2329 kstats.inpps, 2330 kstats.outpps, 2331 kstats.inbps, 2332 kstats.outbps); 2333 2334 return 0; 2335 } 2336 #endif 2337 2338 /* 2339 * Set timeout values for tcp tcpfin udp in the timeout_table. 2340 */ 2341 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2342 { 2343 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2344 struct ip_vs_proto_data *pd; 2345 #endif 2346 2347 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2348 u->tcp_timeout, 2349 u->tcp_fin_timeout, 2350 u->udp_timeout); 2351 2352 #ifdef CONFIG_IP_VS_PROTO_TCP 2353 if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || 2354 u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { 2355 return -EINVAL; 2356 } 2357 #endif 2358 2359 #ifdef CONFIG_IP_VS_PROTO_UDP 2360 if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) 2361 return -EINVAL; 2362 #endif 2363 2364 #ifdef CONFIG_IP_VS_PROTO_TCP 2365 if (u->tcp_timeout) { 2366 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2367 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2368 = u->tcp_timeout * HZ; 2369 } 2370 2371 if (u->tcp_fin_timeout) { 2372 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2373 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2374 = u->tcp_fin_timeout * HZ; 2375 } 2376 #endif 2377 2378 #ifdef CONFIG_IP_VS_PROTO_UDP 2379 if (u->udp_timeout) { 2380 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2381 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2382 = u->udp_timeout * HZ; 2383 } 2384 #endif 2385 return 0; 2386 } 2387 2388 #define CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2389 2390 struct ip_vs_svcdest_user { 2391 struct ip_vs_service_user s; 2392 struct ip_vs_dest_user d; 2393 }; 2394 2395 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = { 2396 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user), 2397 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user), 2398 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user), 2399 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user), 2400 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user), 2401 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user), 2402 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2403 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user), 2404 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user), 2405 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user), 2406 }; 2407 2408 union ip_vs_set_arglen { 2409 struct ip_vs_service_user field_IP_VS_SO_SET_ADD; 2410 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT; 2411 struct ip_vs_service_user field_IP_VS_SO_SET_DEL; 2412 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST; 2413 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST; 2414 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST; 2415 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT; 2416 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON; 2417 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON; 2418 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO; 2419 }; 2420 2421 #define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen) 2422 2423 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2424 struct ip_vs_service_user *usvc_compat) 2425 { 2426 memset(usvc, 0, sizeof(*usvc)); 2427 2428 usvc->af = AF_INET; 2429 usvc->protocol = usvc_compat->protocol; 2430 usvc->addr.ip = usvc_compat->addr; 2431 usvc->port = usvc_compat->port; 2432 usvc->fwmark = usvc_compat->fwmark; 2433 2434 /* Deep copy of sched_name is not needed here */ 2435 usvc->sched_name = usvc_compat->sched_name; 2436 2437 usvc->flags = usvc_compat->flags; 2438 usvc->timeout = usvc_compat->timeout; 2439 usvc->netmask = usvc_compat->netmask; 2440 } 2441 2442 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2443 struct ip_vs_dest_user *udest_compat) 2444 { 2445 memset(udest, 0, sizeof(*udest)); 2446 2447 udest->addr.ip = udest_compat->addr; 2448 udest->port = udest_compat->port; 2449 udest->conn_flags = udest_compat->conn_flags; 2450 udest->weight = udest_compat->weight; 2451 udest->u_threshold = udest_compat->u_threshold; 2452 udest->l_threshold = udest_compat->l_threshold; 2453 udest->af = AF_INET; 2454 udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP; 2455 } 2456 2457 static int 2458 do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) 2459 { 2460 struct net *net = sock_net(sk); 2461 int ret; 2462 unsigned char arg[MAX_SET_ARGLEN]; 2463 struct ip_vs_service_user *usvc_compat; 2464 struct ip_vs_service_user_kern usvc; 2465 struct ip_vs_service *svc; 2466 struct ip_vs_dest_user *udest_compat; 2467 struct ip_vs_dest_user_kern udest; 2468 struct netns_ipvs *ipvs = net_ipvs(net); 2469 2470 BUILD_BUG_ON(sizeof(arg) > 255); 2471 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2472 return -EPERM; 2473 2474 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 2475 return -EINVAL; 2476 if (len != set_arglen[CMDID(cmd)]) { 2477 IP_VS_DBG(1, "set_ctl: len %u != %u\n", 2478 len, set_arglen[CMDID(cmd)]); 2479 return -EINVAL; 2480 } 2481 2482 if (copy_from_sockptr(arg, ptr, len) != 0) 2483 return -EFAULT; 2484 2485 /* Handle daemons since they have another lock */ 2486 if (cmd == IP_VS_SO_SET_STARTDAEMON || 2487 cmd == IP_VS_SO_SET_STOPDAEMON) { 2488 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2489 2490 if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2491 struct ipvs_sync_daemon_cfg cfg; 2492 2493 memset(&cfg, 0, sizeof(cfg)); 2494 ret = -EINVAL; 2495 if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, 2496 sizeof(cfg.mcast_ifn)) <= 0) 2497 return ret; 2498 cfg.syncid = dm->syncid; 2499 ret = start_sync_thread(ipvs, &cfg, dm->state); 2500 } else { 2501 ret = stop_sync_thread(ipvs, dm->state); 2502 } 2503 return ret; 2504 } 2505 2506 mutex_lock(&__ip_vs_mutex); 2507 if (cmd == IP_VS_SO_SET_FLUSH) { 2508 /* Flush the virtual service */ 2509 ret = ip_vs_flush(ipvs, false); 2510 goto out_unlock; 2511 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2512 /* Set timeout values for (tcp tcpfin udp) */ 2513 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); 2514 goto out_unlock; 2515 } else if (!len) { 2516 /* No more commands with len == 0 below */ 2517 ret = -EINVAL; 2518 goto out_unlock; 2519 } 2520 2521 usvc_compat = (struct ip_vs_service_user *)arg; 2522 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2523 2524 /* We only use the new structs internally, so copy userspace compat 2525 * structs to extended internal versions */ 2526 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2527 ip_vs_copy_udest_compat(&udest, udest_compat); 2528 2529 if (cmd == IP_VS_SO_SET_ZERO) { 2530 /* if no service address is set, zero counters in all */ 2531 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2532 ret = ip_vs_zero_all(ipvs); 2533 goto out_unlock; 2534 } 2535 } 2536 2537 if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) && 2538 strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) == 2539 IP_VS_SCHEDNAME_MAXLEN) { 2540 ret = -EINVAL; 2541 goto out_unlock; 2542 } 2543 2544 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2545 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2546 usvc.protocol != IPPROTO_SCTP) { 2547 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n", 2548 usvc.protocol, &usvc.addr.ip, 2549 ntohs(usvc.port)); 2550 ret = -EFAULT; 2551 goto out_unlock; 2552 } 2553 2554 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2555 rcu_read_lock(); 2556 if (usvc.fwmark == 0) 2557 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol, 2558 &usvc.addr, usvc.port); 2559 else 2560 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark); 2561 rcu_read_unlock(); 2562 2563 if (cmd != IP_VS_SO_SET_ADD 2564 && (svc == NULL || svc->protocol != usvc.protocol)) { 2565 ret = -ESRCH; 2566 goto out_unlock; 2567 } 2568 2569 switch (cmd) { 2570 case IP_VS_SO_SET_ADD: 2571 if (svc != NULL) 2572 ret = -EEXIST; 2573 else 2574 ret = ip_vs_add_service(ipvs, &usvc, &svc); 2575 break; 2576 case IP_VS_SO_SET_EDIT: 2577 ret = ip_vs_edit_service(svc, &usvc); 2578 break; 2579 case IP_VS_SO_SET_DEL: 2580 ret = ip_vs_del_service(svc); 2581 if (!ret) 2582 goto out_unlock; 2583 break; 2584 case IP_VS_SO_SET_ZERO: 2585 ret = ip_vs_zero_service(svc); 2586 break; 2587 case IP_VS_SO_SET_ADDDEST: 2588 ret = ip_vs_add_dest(svc, &udest); 2589 break; 2590 case IP_VS_SO_SET_EDITDEST: 2591 ret = ip_vs_edit_dest(svc, &udest); 2592 break; 2593 case IP_VS_SO_SET_DELDEST: 2594 ret = ip_vs_del_dest(svc, &udest); 2595 } 2596 2597 out_unlock: 2598 mutex_unlock(&__ip_vs_mutex); 2599 return ret; 2600 } 2601 2602 2603 static void 2604 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2605 { 2606 struct ip_vs_scheduler *sched; 2607 struct ip_vs_kstats kstats; 2608 char *sched_name; 2609 2610 sched = rcu_dereference_protected(src->scheduler, 1); 2611 sched_name = sched ? sched->name : "none"; 2612 dst->protocol = src->protocol; 2613 dst->addr = src->addr.ip; 2614 dst->port = src->port; 2615 dst->fwmark = src->fwmark; 2616 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); 2617 dst->flags = src->flags; 2618 dst->timeout = src->timeout / HZ; 2619 dst->netmask = src->netmask; 2620 dst->num_dests = src->num_dests; 2621 ip_vs_copy_stats(&kstats, &src->stats); 2622 ip_vs_export_stats_user(&dst->stats, &kstats); 2623 } 2624 2625 static inline int 2626 __ip_vs_get_service_entries(struct netns_ipvs *ipvs, 2627 const struct ip_vs_get_services *get, 2628 struct ip_vs_get_services __user *uptr) 2629 { 2630 int idx, count=0; 2631 struct ip_vs_service *svc; 2632 struct ip_vs_service_entry entry; 2633 int ret = 0; 2634 2635 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2636 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2637 /* Only expose IPv4 entries to old interface */ 2638 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2639 continue; 2640 2641 if (count >= get->num_services) 2642 goto out; 2643 memset(&entry, 0, sizeof(entry)); 2644 ip_vs_copy_service(&entry, svc); 2645 if (copy_to_user(&uptr->entrytable[count], 2646 &entry, sizeof(entry))) { 2647 ret = -EFAULT; 2648 goto out; 2649 } 2650 count++; 2651 } 2652 } 2653 2654 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2655 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2656 /* Only expose IPv4 entries to old interface */ 2657 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2658 continue; 2659 2660 if (count >= get->num_services) 2661 goto out; 2662 memset(&entry, 0, sizeof(entry)); 2663 ip_vs_copy_service(&entry, svc); 2664 if (copy_to_user(&uptr->entrytable[count], 2665 &entry, sizeof(entry))) { 2666 ret = -EFAULT; 2667 goto out; 2668 } 2669 count++; 2670 } 2671 } 2672 out: 2673 return ret; 2674 } 2675 2676 static inline int 2677 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get, 2678 struct ip_vs_get_dests __user *uptr) 2679 { 2680 struct ip_vs_service *svc; 2681 union nf_inet_addr addr = { .ip = get->addr }; 2682 int ret = 0; 2683 2684 rcu_read_lock(); 2685 if (get->fwmark) 2686 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark); 2687 else 2688 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr, 2689 get->port); 2690 rcu_read_unlock(); 2691 2692 if (svc) { 2693 int count = 0; 2694 struct ip_vs_dest *dest; 2695 struct ip_vs_dest_entry entry; 2696 struct ip_vs_kstats kstats; 2697 2698 memset(&entry, 0, sizeof(entry)); 2699 list_for_each_entry(dest, &svc->destinations, n_list) { 2700 if (count >= get->num_dests) 2701 break; 2702 2703 /* Cannot expose heterogeneous members via sockopt 2704 * interface 2705 */ 2706 if (dest->af != svc->af) 2707 continue; 2708 2709 entry.addr = dest->addr.ip; 2710 entry.port = dest->port; 2711 entry.conn_flags = atomic_read(&dest->conn_flags); 2712 entry.weight = atomic_read(&dest->weight); 2713 entry.u_threshold = dest->u_threshold; 2714 entry.l_threshold = dest->l_threshold; 2715 entry.activeconns = atomic_read(&dest->activeconns); 2716 entry.inactconns = atomic_read(&dest->inactconns); 2717 entry.persistconns = atomic_read(&dest->persistconns); 2718 ip_vs_copy_stats(&kstats, &dest->stats); 2719 ip_vs_export_stats_user(&entry.stats, &kstats); 2720 if (copy_to_user(&uptr->entrytable[count], 2721 &entry, sizeof(entry))) { 2722 ret = -EFAULT; 2723 break; 2724 } 2725 count++; 2726 } 2727 } else 2728 ret = -ESRCH; 2729 return ret; 2730 } 2731 2732 static inline void 2733 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2734 { 2735 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2736 struct ip_vs_proto_data *pd; 2737 #endif 2738 2739 memset(u, 0, sizeof (*u)); 2740 2741 #ifdef CONFIG_IP_VS_PROTO_TCP 2742 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2743 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2744 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2745 #endif 2746 #ifdef CONFIG_IP_VS_PROTO_UDP 2747 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2748 u->udp_timeout = 2749 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2750 #endif 2751 } 2752 2753 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = { 2754 [CMDID(IP_VS_SO_GET_VERSION)] = 64, 2755 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo), 2756 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services), 2757 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry), 2758 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests), 2759 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2760 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user), 2761 }; 2762 2763 union ip_vs_get_arglen { 2764 char field_IP_VS_SO_GET_VERSION[64]; 2765 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO; 2766 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES; 2767 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE; 2768 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS; 2769 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT; 2770 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2]; 2771 }; 2772 2773 #define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen) 2774 2775 static int 2776 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2777 { 2778 unsigned char arg[MAX_GET_ARGLEN]; 2779 int ret = 0; 2780 unsigned int copylen; 2781 struct net *net = sock_net(sk); 2782 struct netns_ipvs *ipvs = net_ipvs(net); 2783 2784 BUG_ON(!net); 2785 BUILD_BUG_ON(sizeof(arg) > 255); 2786 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2787 return -EPERM; 2788 2789 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2790 return -EINVAL; 2791 2792 copylen = get_arglen[CMDID(cmd)]; 2793 if (*len < (int) copylen) { 2794 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen); 2795 return -EINVAL; 2796 } 2797 2798 if (copy_from_user(arg, user, copylen) != 0) 2799 return -EFAULT; 2800 /* 2801 * Handle daemons first since it has its own locking 2802 */ 2803 if (cmd == IP_VS_SO_GET_DAEMON) { 2804 struct ip_vs_daemon_user d[2]; 2805 2806 memset(&d, 0, sizeof(d)); 2807 mutex_lock(&ipvs->sync_mutex); 2808 if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2809 d[0].state = IP_VS_STATE_MASTER; 2810 strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, 2811 sizeof(d[0].mcast_ifn)); 2812 d[0].syncid = ipvs->mcfg.syncid; 2813 } 2814 if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2815 d[1].state = IP_VS_STATE_BACKUP; 2816 strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, 2817 sizeof(d[1].mcast_ifn)); 2818 d[1].syncid = ipvs->bcfg.syncid; 2819 } 2820 if (copy_to_user(user, &d, sizeof(d)) != 0) 2821 ret = -EFAULT; 2822 mutex_unlock(&ipvs->sync_mutex); 2823 return ret; 2824 } 2825 2826 mutex_lock(&__ip_vs_mutex); 2827 switch (cmd) { 2828 case IP_VS_SO_GET_VERSION: 2829 { 2830 char buf[64]; 2831 2832 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2833 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2834 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2835 ret = -EFAULT; 2836 goto out; 2837 } 2838 *len = strlen(buf)+1; 2839 } 2840 break; 2841 2842 case IP_VS_SO_GET_INFO: 2843 { 2844 struct ip_vs_getinfo info; 2845 info.version = IP_VS_VERSION_CODE; 2846 info.size = ip_vs_conn_tab_size; 2847 info.num_services = ipvs->num_services; 2848 if (copy_to_user(user, &info, sizeof(info)) != 0) 2849 ret = -EFAULT; 2850 } 2851 break; 2852 2853 case IP_VS_SO_GET_SERVICES: 2854 { 2855 struct ip_vs_get_services *get; 2856 int size; 2857 2858 get = (struct ip_vs_get_services *)arg; 2859 size = struct_size(get, entrytable, get->num_services); 2860 if (*len != size) { 2861 pr_err("length: %u != %u\n", *len, size); 2862 ret = -EINVAL; 2863 goto out; 2864 } 2865 ret = __ip_vs_get_service_entries(ipvs, get, user); 2866 } 2867 break; 2868 2869 case IP_VS_SO_GET_SERVICE: 2870 { 2871 struct ip_vs_service_entry *entry; 2872 struct ip_vs_service *svc; 2873 union nf_inet_addr addr; 2874 2875 entry = (struct ip_vs_service_entry *)arg; 2876 addr.ip = entry->addr; 2877 rcu_read_lock(); 2878 if (entry->fwmark) 2879 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark); 2880 else 2881 svc = __ip_vs_service_find(ipvs, AF_INET, 2882 entry->protocol, &addr, 2883 entry->port); 2884 rcu_read_unlock(); 2885 if (svc) { 2886 ip_vs_copy_service(entry, svc); 2887 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2888 ret = -EFAULT; 2889 } else 2890 ret = -ESRCH; 2891 } 2892 break; 2893 2894 case IP_VS_SO_GET_DESTS: 2895 { 2896 struct ip_vs_get_dests *get; 2897 int size; 2898 2899 get = (struct ip_vs_get_dests *)arg; 2900 size = struct_size(get, entrytable, get->num_dests); 2901 if (*len != size) { 2902 pr_err("length: %u != %u\n", *len, size); 2903 ret = -EINVAL; 2904 goto out; 2905 } 2906 ret = __ip_vs_get_dest_entries(ipvs, get, user); 2907 } 2908 break; 2909 2910 case IP_VS_SO_GET_TIMEOUT: 2911 { 2912 struct ip_vs_timeout_user t; 2913 2914 __ip_vs_get_timeouts(ipvs, &t); 2915 if (copy_to_user(user, &t, sizeof(t)) != 0) 2916 ret = -EFAULT; 2917 } 2918 break; 2919 2920 default: 2921 ret = -EINVAL; 2922 } 2923 2924 out: 2925 mutex_unlock(&__ip_vs_mutex); 2926 return ret; 2927 } 2928 2929 2930 static struct nf_sockopt_ops ip_vs_sockopts = { 2931 .pf = PF_INET, 2932 .set_optmin = IP_VS_BASE_CTL, 2933 .set_optmax = IP_VS_SO_SET_MAX+1, 2934 .set = do_ip_vs_set_ctl, 2935 .get_optmin = IP_VS_BASE_CTL, 2936 .get_optmax = IP_VS_SO_GET_MAX+1, 2937 .get = do_ip_vs_get_ctl, 2938 .owner = THIS_MODULE, 2939 }; 2940 2941 /* 2942 * Generic Netlink interface 2943 */ 2944 2945 /* IPVS genetlink family */ 2946 static struct genl_family ip_vs_genl_family; 2947 2948 /* Policy used for first-level command attributes */ 2949 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2950 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2951 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2952 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2953 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2954 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2955 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2956 }; 2957 2958 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2959 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2960 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2961 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2962 .len = IP_VS_IFNAME_MAXLEN - 1 }, 2963 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2964 [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, 2965 [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, 2966 [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) }, 2967 [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 }, 2968 [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 }, 2969 }; 2970 2971 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2972 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2973 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2974 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2975 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2976 .len = sizeof(union nf_inet_addr) }, 2977 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2978 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2979 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2980 .len = IP_VS_SCHEDNAME_MAXLEN - 1 }, 2981 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, 2982 .len = IP_VS_PENAME_MAXLEN }, 2983 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2984 .len = sizeof(struct ip_vs_flags) }, 2985 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2986 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2987 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2988 }; 2989 2990 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2991 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2992 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2993 .len = sizeof(union nf_inet_addr) }, 2994 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2995 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2996 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2997 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2998 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2999 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 3000 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 3001 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 3002 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 3003 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, 3004 [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, 3005 [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, 3006 [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 }, 3007 }; 3008 3009 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 3010 struct ip_vs_kstats *kstats) 3011 { 3012 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3013 3014 if (!nl_stats) 3015 return -EMSGSIZE; 3016 3017 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || 3018 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || 3019 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || 3020 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3021 IPVS_STATS_ATTR_PAD) || 3022 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3023 IPVS_STATS_ATTR_PAD) || 3024 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || 3025 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || 3026 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || 3027 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || 3028 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) 3029 goto nla_put_failure; 3030 nla_nest_end(skb, nl_stats); 3031 3032 return 0; 3033 3034 nla_put_failure: 3035 nla_nest_cancel(skb, nl_stats); 3036 return -EMSGSIZE; 3037 } 3038 3039 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, 3040 struct ip_vs_kstats *kstats) 3041 { 3042 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3043 3044 if (!nl_stats) 3045 return -EMSGSIZE; 3046 3047 if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns, 3048 IPVS_STATS_ATTR_PAD) || 3049 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts, 3050 IPVS_STATS_ATTR_PAD) || 3051 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts, 3052 IPVS_STATS_ATTR_PAD) || 3053 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3054 IPVS_STATS_ATTR_PAD) || 3055 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3056 IPVS_STATS_ATTR_PAD) || 3057 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps, 3058 IPVS_STATS_ATTR_PAD) || 3059 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps, 3060 IPVS_STATS_ATTR_PAD) || 3061 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps, 3062 IPVS_STATS_ATTR_PAD) || 3063 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps, 3064 IPVS_STATS_ATTR_PAD) || 3065 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps, 3066 IPVS_STATS_ATTR_PAD)) 3067 goto nla_put_failure; 3068 nla_nest_end(skb, nl_stats); 3069 3070 return 0; 3071 3072 nla_put_failure: 3073 nla_nest_cancel(skb, nl_stats); 3074 return -EMSGSIZE; 3075 } 3076 3077 static int ip_vs_genl_fill_service(struct sk_buff *skb, 3078 struct ip_vs_service *svc) 3079 { 3080 struct ip_vs_scheduler *sched; 3081 struct ip_vs_pe *pe; 3082 struct nlattr *nl_service; 3083 struct ip_vs_flags flags = { .flags = svc->flags, 3084 .mask = ~0 }; 3085 struct ip_vs_kstats kstats; 3086 char *sched_name; 3087 3088 nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE); 3089 if (!nl_service) 3090 return -EMSGSIZE; 3091 3092 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) 3093 goto nla_put_failure; 3094 if (svc->fwmark) { 3095 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) 3096 goto nla_put_failure; 3097 } else { 3098 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 3099 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 3100 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 3101 goto nla_put_failure; 3102 } 3103 3104 sched = rcu_dereference_protected(svc->scheduler, 1); 3105 sched_name = sched ? sched->name : "none"; 3106 pe = rcu_dereference_protected(svc->pe, 1); 3107 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || 3108 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 3109 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 3110 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 3111 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 3112 goto nla_put_failure; 3113 ip_vs_copy_stats(&kstats, &svc->stats); 3114 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) 3115 goto nla_put_failure; 3116 if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) 3117 goto nla_put_failure; 3118 3119 nla_nest_end(skb, nl_service); 3120 3121 return 0; 3122 3123 nla_put_failure: 3124 nla_nest_cancel(skb, nl_service); 3125 return -EMSGSIZE; 3126 } 3127 3128 static int ip_vs_genl_dump_service(struct sk_buff *skb, 3129 struct ip_vs_service *svc, 3130 struct netlink_callback *cb) 3131 { 3132 void *hdr; 3133 3134 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3135 &ip_vs_genl_family, NLM_F_MULTI, 3136 IPVS_CMD_NEW_SERVICE); 3137 if (!hdr) 3138 return -EMSGSIZE; 3139 3140 if (ip_vs_genl_fill_service(skb, svc) < 0) 3141 goto nla_put_failure; 3142 3143 genlmsg_end(skb, hdr); 3144 return 0; 3145 3146 nla_put_failure: 3147 genlmsg_cancel(skb, hdr); 3148 return -EMSGSIZE; 3149 } 3150 3151 static int ip_vs_genl_dump_services(struct sk_buff *skb, 3152 struct netlink_callback *cb) 3153 { 3154 int idx = 0, i; 3155 int start = cb->args[0]; 3156 struct ip_vs_service *svc; 3157 struct net *net = sock_net(skb->sk); 3158 struct netns_ipvs *ipvs = net_ipvs(net); 3159 3160 mutex_lock(&__ip_vs_mutex); 3161 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3162 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 3163 if (++idx <= start || (svc->ipvs != ipvs)) 3164 continue; 3165 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3166 idx--; 3167 goto nla_put_failure; 3168 } 3169 } 3170 } 3171 3172 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3173 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 3174 if (++idx <= start || (svc->ipvs != ipvs)) 3175 continue; 3176 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3177 idx--; 3178 goto nla_put_failure; 3179 } 3180 } 3181 } 3182 3183 nla_put_failure: 3184 mutex_unlock(&__ip_vs_mutex); 3185 cb->args[0] = idx; 3186 3187 return skb->len; 3188 } 3189 3190 static bool ip_vs_is_af_valid(int af) 3191 { 3192 if (af == AF_INET) 3193 return true; 3194 #ifdef CONFIG_IP_VS_IPV6 3195 if (af == AF_INET6 && ipv6_mod_enabled()) 3196 return true; 3197 #endif 3198 return false; 3199 } 3200 3201 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, 3202 struct ip_vs_service_user_kern *usvc, 3203 struct nlattr *nla, bool full_entry, 3204 struct ip_vs_service **ret_svc) 3205 { 3206 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 3207 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 3208 struct ip_vs_service *svc; 3209 3210 /* Parse mandatory identifying service fields first */ 3211 if (nla == NULL || 3212 nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL)) 3213 return -EINVAL; 3214 3215 nla_af = attrs[IPVS_SVC_ATTR_AF]; 3216 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 3217 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 3218 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 3219 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 3220 3221 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 3222 return -EINVAL; 3223 3224 memset(usvc, 0, sizeof(*usvc)); 3225 3226 usvc->af = nla_get_u16(nla_af); 3227 if (!ip_vs_is_af_valid(usvc->af)) 3228 return -EAFNOSUPPORT; 3229 3230 if (nla_fwmark) { 3231 usvc->protocol = IPPROTO_TCP; 3232 usvc->fwmark = nla_get_u32(nla_fwmark); 3233 } else { 3234 usvc->protocol = nla_get_u16(nla_protocol); 3235 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 3236 usvc->port = nla_get_be16(nla_port); 3237 usvc->fwmark = 0; 3238 } 3239 3240 rcu_read_lock(); 3241 if (usvc->fwmark) 3242 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); 3243 else 3244 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, 3245 &usvc->addr, usvc->port); 3246 rcu_read_unlock(); 3247 *ret_svc = svc; 3248 3249 /* If a full entry was requested, check for the additional fields */ 3250 if (full_entry) { 3251 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, 3252 *nla_netmask; 3253 struct ip_vs_flags flags; 3254 3255 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 3256 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; 3257 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 3258 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 3259 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 3260 3261 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 3262 return -EINVAL; 3263 3264 nla_memcpy(&flags, nla_flags, sizeof(flags)); 3265 3266 /* prefill flags from service if it already exists */ 3267 if (svc) 3268 usvc->flags = svc->flags; 3269 3270 /* set new flags from userland */ 3271 usvc->flags = (usvc->flags & ~flags.mask) | 3272 (flags.flags & flags.mask); 3273 usvc->sched_name = nla_data(nla_sched); 3274 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3275 usvc->timeout = nla_get_u32(nla_timeout); 3276 usvc->netmask = nla_get_be32(nla_netmask); 3277 } 3278 3279 return 0; 3280 } 3281 3282 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, 3283 struct nlattr *nla) 3284 { 3285 struct ip_vs_service_user_kern usvc; 3286 struct ip_vs_service *svc; 3287 int ret; 3288 3289 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc); 3290 return ret ? ERR_PTR(ret) : svc; 3291 } 3292 3293 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 3294 { 3295 struct nlattr *nl_dest; 3296 struct ip_vs_kstats kstats; 3297 3298 nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST); 3299 if (!nl_dest) 3300 return -EMSGSIZE; 3301 3302 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3303 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3304 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3305 (atomic_read(&dest->conn_flags) & 3306 IP_VS_CONN_F_FWD_MASK)) || 3307 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, 3308 atomic_read(&dest->weight)) || 3309 nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE, 3310 dest->tun_type) || 3311 nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, 3312 dest->tun_port) || 3313 nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS, 3314 dest->tun_flags) || 3315 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || 3316 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || 3317 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 3318 atomic_read(&dest->activeconns)) || 3319 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, 3320 atomic_read(&dest->inactconns)) || 3321 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 3322 atomic_read(&dest->persistconns)) || 3323 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) 3324 goto nla_put_failure; 3325 ip_vs_copy_stats(&kstats, &dest->stats); 3326 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) 3327 goto nla_put_failure; 3328 if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) 3329 goto nla_put_failure; 3330 3331 nla_nest_end(skb, nl_dest); 3332 3333 return 0; 3334 3335 nla_put_failure: 3336 nla_nest_cancel(skb, nl_dest); 3337 return -EMSGSIZE; 3338 } 3339 3340 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 3341 struct netlink_callback *cb) 3342 { 3343 void *hdr; 3344 3345 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3346 &ip_vs_genl_family, NLM_F_MULTI, 3347 IPVS_CMD_NEW_DEST); 3348 if (!hdr) 3349 return -EMSGSIZE; 3350 3351 if (ip_vs_genl_fill_dest(skb, dest) < 0) 3352 goto nla_put_failure; 3353 3354 genlmsg_end(skb, hdr); 3355 return 0; 3356 3357 nla_put_failure: 3358 genlmsg_cancel(skb, hdr); 3359 return -EMSGSIZE; 3360 } 3361 3362 static int ip_vs_genl_dump_dests(struct sk_buff *skb, 3363 struct netlink_callback *cb) 3364 { 3365 int idx = 0; 3366 int start = cb->args[0]; 3367 struct ip_vs_service *svc; 3368 struct ip_vs_dest *dest; 3369 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3370 struct net *net = sock_net(skb->sk); 3371 struct netns_ipvs *ipvs = net_ipvs(net); 3372 3373 mutex_lock(&__ip_vs_mutex); 3374 3375 /* Try to find the service for which to dump destinations */ 3376 if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) 3377 goto out_err; 3378 3379 3380 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); 3381 if (IS_ERR_OR_NULL(svc)) 3382 goto out_err; 3383 3384 /* Dump the destinations */ 3385 list_for_each_entry(dest, &svc->destinations, n_list) { 3386 if (++idx <= start) 3387 continue; 3388 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 3389 idx--; 3390 goto nla_put_failure; 3391 } 3392 } 3393 3394 nla_put_failure: 3395 cb->args[0] = idx; 3396 3397 out_err: 3398 mutex_unlock(&__ip_vs_mutex); 3399 3400 return skb->len; 3401 } 3402 3403 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 3404 struct nlattr *nla, bool full_entry) 3405 { 3406 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 3407 struct nlattr *nla_addr, *nla_port; 3408 struct nlattr *nla_addr_family; 3409 3410 /* Parse mandatory identifying destination fields first */ 3411 if (nla == NULL || 3412 nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL)) 3413 return -EINVAL; 3414 3415 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 3416 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 3417 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; 3418 3419 if (!(nla_addr && nla_port)) 3420 return -EINVAL; 3421 3422 memset(udest, 0, sizeof(*udest)); 3423 3424 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3425 udest->port = nla_get_be16(nla_port); 3426 3427 if (nla_addr_family) 3428 udest->af = nla_get_u16(nla_addr_family); 3429 else 3430 udest->af = 0; 3431 3432 /* If a full entry was requested, check for the additional fields */ 3433 if (full_entry) { 3434 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 3435 *nla_l_thresh, *nla_tun_type, *nla_tun_port, 3436 *nla_tun_flags; 3437 3438 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 3439 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 3440 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 3441 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 3442 nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; 3443 nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; 3444 nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS]; 3445 3446 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 3447 return -EINVAL; 3448 3449 udest->conn_flags = nla_get_u32(nla_fwd) 3450 & IP_VS_CONN_F_FWD_MASK; 3451 udest->weight = nla_get_u32(nla_weight); 3452 udest->u_threshold = nla_get_u32(nla_u_thresh); 3453 udest->l_threshold = nla_get_u32(nla_l_thresh); 3454 3455 if (nla_tun_type) 3456 udest->tun_type = nla_get_u8(nla_tun_type); 3457 3458 if (nla_tun_port) 3459 udest->tun_port = nla_get_be16(nla_tun_port); 3460 3461 if (nla_tun_flags) 3462 udest->tun_flags = nla_get_u16(nla_tun_flags); 3463 } 3464 3465 return 0; 3466 } 3467 3468 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, 3469 struct ipvs_sync_daemon_cfg *c) 3470 { 3471 struct nlattr *nl_daemon; 3472 3473 nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON); 3474 if (!nl_daemon) 3475 return -EMSGSIZE; 3476 3477 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || 3478 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) || 3479 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) || 3480 nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) || 3481 nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) || 3482 nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl)) 3483 goto nla_put_failure; 3484 #ifdef CONFIG_IP_VS_IPV6 3485 if (c->mcast_af == AF_INET6) { 3486 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6, 3487 &c->mcast_group.in6)) 3488 goto nla_put_failure; 3489 } else 3490 #endif 3491 if (c->mcast_af == AF_INET && 3492 nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP, 3493 c->mcast_group.ip)) 3494 goto nla_put_failure; 3495 nla_nest_end(skb, nl_daemon); 3496 3497 return 0; 3498 3499 nla_put_failure: 3500 nla_nest_cancel(skb, nl_daemon); 3501 return -EMSGSIZE; 3502 } 3503 3504 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, 3505 struct ipvs_sync_daemon_cfg *c, 3506 struct netlink_callback *cb) 3507 { 3508 void *hdr; 3509 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3510 &ip_vs_genl_family, NLM_F_MULTI, 3511 IPVS_CMD_NEW_DAEMON); 3512 if (!hdr) 3513 return -EMSGSIZE; 3514 3515 if (ip_vs_genl_fill_daemon(skb, state, c)) 3516 goto nla_put_failure; 3517 3518 genlmsg_end(skb, hdr); 3519 return 0; 3520 3521 nla_put_failure: 3522 genlmsg_cancel(skb, hdr); 3523 return -EMSGSIZE; 3524 } 3525 3526 static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3527 struct netlink_callback *cb) 3528 { 3529 struct net *net = sock_net(skb->sk); 3530 struct netns_ipvs *ipvs = net_ipvs(net); 3531 3532 mutex_lock(&ipvs->sync_mutex); 3533 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3534 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3535 &ipvs->mcfg, cb) < 0) 3536 goto nla_put_failure; 3537 3538 cb->args[0] = 1; 3539 } 3540 3541 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3542 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3543 &ipvs->bcfg, cb) < 0) 3544 goto nla_put_failure; 3545 3546 cb->args[1] = 1; 3547 } 3548 3549 nla_put_failure: 3550 mutex_unlock(&ipvs->sync_mutex); 3551 3552 return skb->len; 3553 } 3554 3555 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3556 { 3557 struct ipvs_sync_daemon_cfg c; 3558 struct nlattr *a; 3559 int ret; 3560 3561 memset(&c, 0, sizeof(c)); 3562 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3563 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3564 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3565 return -EINVAL; 3566 strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3567 sizeof(c.mcast_ifn)); 3568 c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); 3569 3570 a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN]; 3571 if (a) 3572 c.sync_maxlen = nla_get_u16(a); 3573 3574 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP]; 3575 if (a) { 3576 c.mcast_af = AF_INET; 3577 c.mcast_group.ip = nla_get_in_addr(a); 3578 if (!ipv4_is_multicast(c.mcast_group.ip)) 3579 return -EINVAL; 3580 } else { 3581 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6]; 3582 if (a) { 3583 #ifdef CONFIG_IP_VS_IPV6 3584 int addr_type; 3585 3586 c.mcast_af = AF_INET6; 3587 c.mcast_group.in6 = nla_get_in6_addr(a); 3588 addr_type = ipv6_addr_type(&c.mcast_group.in6); 3589 if (!(addr_type & IPV6_ADDR_MULTICAST)) 3590 return -EINVAL; 3591 #else 3592 return -EAFNOSUPPORT; 3593 #endif 3594 } 3595 } 3596 3597 a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT]; 3598 if (a) 3599 c.mcast_port = nla_get_u16(a); 3600 3601 a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL]; 3602 if (a) 3603 c.mcast_ttl = nla_get_u8(a); 3604 3605 /* The synchronization protocol is incompatible with mixed family 3606 * services 3607 */ 3608 if (ipvs->mixed_address_family_dests > 0) 3609 return -EINVAL; 3610 3611 ret = start_sync_thread(ipvs, &c, 3612 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3613 return ret; 3614 } 3615 3616 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3617 { 3618 int ret; 3619 3620 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3621 return -EINVAL; 3622 3623 ret = stop_sync_thread(ipvs, 3624 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3625 return ret; 3626 } 3627 3628 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs) 3629 { 3630 struct ip_vs_timeout_user t; 3631 3632 __ip_vs_get_timeouts(ipvs, &t); 3633 3634 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3635 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3636 3637 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 3638 t.tcp_fin_timeout = 3639 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 3640 3641 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3642 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3643 3644 return ip_vs_set_timeout(ipvs, &t); 3645 } 3646 3647 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3648 { 3649 int ret = -EINVAL, cmd; 3650 struct net *net = sock_net(skb->sk); 3651 struct netns_ipvs *ipvs = net_ipvs(net); 3652 3653 cmd = info->genlhdr->cmd; 3654 3655 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3656 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 3657 3658 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 3659 nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack)) 3660 goto out; 3661 3662 if (cmd == IPVS_CMD_NEW_DAEMON) 3663 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs); 3664 else 3665 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs); 3666 } 3667 3668 out: 3669 return ret; 3670 } 3671 3672 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3673 { 3674 bool need_full_svc = false, need_full_dest = false; 3675 struct ip_vs_service *svc = NULL; 3676 struct ip_vs_service_user_kern usvc; 3677 struct ip_vs_dest_user_kern udest; 3678 int ret = 0, cmd; 3679 struct net *net = sock_net(skb->sk); 3680 struct netns_ipvs *ipvs = net_ipvs(net); 3681 3682 cmd = info->genlhdr->cmd; 3683 3684 mutex_lock(&__ip_vs_mutex); 3685 3686 if (cmd == IPVS_CMD_FLUSH) { 3687 ret = ip_vs_flush(ipvs, false); 3688 goto out; 3689 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3690 ret = ip_vs_genl_set_config(ipvs, info->attrs); 3691 goto out; 3692 } else if (cmd == IPVS_CMD_ZERO && 3693 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3694 ret = ip_vs_zero_all(ipvs); 3695 goto out; 3696 } 3697 3698 /* All following commands require a service argument, so check if we 3699 * received a valid one. We need a full service specification when 3700 * adding / editing a service. Only identifying members otherwise. */ 3701 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3702 need_full_svc = true; 3703 3704 ret = ip_vs_genl_parse_service(ipvs, &usvc, 3705 info->attrs[IPVS_CMD_ATTR_SERVICE], 3706 need_full_svc, &svc); 3707 if (ret) 3708 goto out; 3709 3710 /* Unless we're adding a new service, the service must already exist */ 3711 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3712 ret = -ESRCH; 3713 goto out; 3714 } 3715 3716 /* Destination commands require a valid destination argument. For 3717 * adding / editing a destination, we need a full destination 3718 * specification. */ 3719 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3720 cmd == IPVS_CMD_DEL_DEST) { 3721 if (cmd != IPVS_CMD_DEL_DEST) 3722 need_full_dest = true; 3723 3724 ret = ip_vs_genl_parse_dest(&udest, 3725 info->attrs[IPVS_CMD_ATTR_DEST], 3726 need_full_dest); 3727 if (ret) 3728 goto out; 3729 3730 /* Old protocols did not allow the user to specify address 3731 * family, so we set it to zero instead. We also didn't 3732 * allow heterogeneous pools in the old code, so it's safe 3733 * to assume that this will have the same address family as 3734 * the service. 3735 */ 3736 if (udest.af == 0) 3737 udest.af = svc->af; 3738 3739 if (!ip_vs_is_af_valid(udest.af)) { 3740 ret = -EAFNOSUPPORT; 3741 goto out; 3742 } 3743 3744 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { 3745 /* The synchronization protocol is incompatible 3746 * with mixed family services 3747 */ 3748 if (ipvs->sync_state) { 3749 ret = -EINVAL; 3750 goto out; 3751 } 3752 3753 /* Which connection types do we support? */ 3754 switch (udest.conn_flags) { 3755 case IP_VS_CONN_F_TUNNEL: 3756 /* We are able to forward this */ 3757 break; 3758 default: 3759 ret = -EINVAL; 3760 goto out; 3761 } 3762 } 3763 } 3764 3765 switch (cmd) { 3766 case IPVS_CMD_NEW_SERVICE: 3767 if (svc == NULL) 3768 ret = ip_vs_add_service(ipvs, &usvc, &svc); 3769 else 3770 ret = -EEXIST; 3771 break; 3772 case IPVS_CMD_SET_SERVICE: 3773 ret = ip_vs_edit_service(svc, &usvc); 3774 break; 3775 case IPVS_CMD_DEL_SERVICE: 3776 ret = ip_vs_del_service(svc); 3777 /* do not use svc, it can be freed */ 3778 break; 3779 case IPVS_CMD_NEW_DEST: 3780 ret = ip_vs_add_dest(svc, &udest); 3781 break; 3782 case IPVS_CMD_SET_DEST: 3783 ret = ip_vs_edit_dest(svc, &udest); 3784 break; 3785 case IPVS_CMD_DEL_DEST: 3786 ret = ip_vs_del_dest(svc, &udest); 3787 break; 3788 case IPVS_CMD_ZERO: 3789 ret = ip_vs_zero_service(svc); 3790 break; 3791 default: 3792 ret = -EINVAL; 3793 } 3794 3795 out: 3796 mutex_unlock(&__ip_vs_mutex); 3797 3798 return ret; 3799 } 3800 3801 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3802 { 3803 struct sk_buff *msg; 3804 void *reply; 3805 int ret, cmd, reply_cmd; 3806 struct net *net = sock_net(skb->sk); 3807 struct netns_ipvs *ipvs = net_ipvs(net); 3808 3809 cmd = info->genlhdr->cmd; 3810 3811 if (cmd == IPVS_CMD_GET_SERVICE) 3812 reply_cmd = IPVS_CMD_NEW_SERVICE; 3813 else if (cmd == IPVS_CMD_GET_INFO) 3814 reply_cmd = IPVS_CMD_SET_INFO; 3815 else if (cmd == IPVS_CMD_GET_CONFIG) 3816 reply_cmd = IPVS_CMD_SET_CONFIG; 3817 else { 3818 pr_err("unknown Generic Netlink command\n"); 3819 return -EINVAL; 3820 } 3821 3822 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3823 if (!msg) 3824 return -ENOMEM; 3825 3826 mutex_lock(&__ip_vs_mutex); 3827 3828 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3829 if (reply == NULL) 3830 goto nla_put_failure; 3831 3832 switch (cmd) { 3833 case IPVS_CMD_GET_SERVICE: 3834 { 3835 struct ip_vs_service *svc; 3836 3837 svc = ip_vs_genl_find_service(ipvs, 3838 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3839 if (IS_ERR(svc)) { 3840 ret = PTR_ERR(svc); 3841 goto out_err; 3842 } else if (svc) { 3843 ret = ip_vs_genl_fill_service(msg, svc); 3844 if (ret) 3845 goto nla_put_failure; 3846 } else { 3847 ret = -ESRCH; 3848 goto out_err; 3849 } 3850 3851 break; 3852 } 3853 3854 case IPVS_CMD_GET_CONFIG: 3855 { 3856 struct ip_vs_timeout_user t; 3857 3858 __ip_vs_get_timeouts(ipvs, &t); 3859 #ifdef CONFIG_IP_VS_PROTO_TCP 3860 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3861 t.tcp_timeout) || 3862 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3863 t.tcp_fin_timeout)) 3864 goto nla_put_failure; 3865 #endif 3866 #ifdef CONFIG_IP_VS_PROTO_UDP 3867 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) 3868 goto nla_put_failure; 3869 #endif 3870 3871 break; 3872 } 3873 3874 case IPVS_CMD_GET_INFO: 3875 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, 3876 IP_VS_VERSION_CODE) || 3877 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3878 ip_vs_conn_tab_size)) 3879 goto nla_put_failure; 3880 break; 3881 } 3882 3883 genlmsg_end(msg, reply); 3884 ret = genlmsg_reply(msg, info); 3885 goto out; 3886 3887 nla_put_failure: 3888 pr_err("not enough space in Netlink message\n"); 3889 ret = -EMSGSIZE; 3890 3891 out_err: 3892 nlmsg_free(msg); 3893 out: 3894 mutex_unlock(&__ip_vs_mutex); 3895 3896 return ret; 3897 } 3898 3899 3900 static const struct genl_small_ops ip_vs_genl_ops[] = { 3901 { 3902 .cmd = IPVS_CMD_NEW_SERVICE, 3903 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3904 .flags = GENL_ADMIN_PERM, 3905 .doit = ip_vs_genl_set_cmd, 3906 }, 3907 { 3908 .cmd = IPVS_CMD_SET_SERVICE, 3909 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3910 .flags = GENL_ADMIN_PERM, 3911 .doit = ip_vs_genl_set_cmd, 3912 }, 3913 { 3914 .cmd = IPVS_CMD_DEL_SERVICE, 3915 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3916 .flags = GENL_ADMIN_PERM, 3917 .doit = ip_vs_genl_set_cmd, 3918 }, 3919 { 3920 .cmd = IPVS_CMD_GET_SERVICE, 3921 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3922 .flags = GENL_ADMIN_PERM, 3923 .doit = ip_vs_genl_get_cmd, 3924 .dumpit = ip_vs_genl_dump_services, 3925 }, 3926 { 3927 .cmd = IPVS_CMD_NEW_DEST, 3928 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3929 .flags = GENL_ADMIN_PERM, 3930 .doit = ip_vs_genl_set_cmd, 3931 }, 3932 { 3933 .cmd = IPVS_CMD_SET_DEST, 3934 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3935 .flags = GENL_ADMIN_PERM, 3936 .doit = ip_vs_genl_set_cmd, 3937 }, 3938 { 3939 .cmd = IPVS_CMD_DEL_DEST, 3940 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3941 .flags = GENL_ADMIN_PERM, 3942 .doit = ip_vs_genl_set_cmd, 3943 }, 3944 { 3945 .cmd = IPVS_CMD_GET_DEST, 3946 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3947 .flags = GENL_ADMIN_PERM, 3948 .dumpit = ip_vs_genl_dump_dests, 3949 }, 3950 { 3951 .cmd = IPVS_CMD_NEW_DAEMON, 3952 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3953 .flags = GENL_ADMIN_PERM, 3954 .doit = ip_vs_genl_set_daemon, 3955 }, 3956 { 3957 .cmd = IPVS_CMD_DEL_DAEMON, 3958 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3959 .flags = GENL_ADMIN_PERM, 3960 .doit = ip_vs_genl_set_daemon, 3961 }, 3962 { 3963 .cmd = IPVS_CMD_GET_DAEMON, 3964 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3965 .flags = GENL_ADMIN_PERM, 3966 .dumpit = ip_vs_genl_dump_daemons, 3967 }, 3968 { 3969 .cmd = IPVS_CMD_SET_CONFIG, 3970 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3971 .flags = GENL_ADMIN_PERM, 3972 .doit = ip_vs_genl_set_cmd, 3973 }, 3974 { 3975 .cmd = IPVS_CMD_GET_CONFIG, 3976 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3977 .flags = GENL_ADMIN_PERM, 3978 .doit = ip_vs_genl_get_cmd, 3979 }, 3980 { 3981 .cmd = IPVS_CMD_GET_INFO, 3982 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3983 .flags = GENL_ADMIN_PERM, 3984 .doit = ip_vs_genl_get_cmd, 3985 }, 3986 { 3987 .cmd = IPVS_CMD_ZERO, 3988 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3989 .flags = GENL_ADMIN_PERM, 3990 .doit = ip_vs_genl_set_cmd, 3991 }, 3992 { 3993 .cmd = IPVS_CMD_FLUSH, 3994 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3995 .flags = GENL_ADMIN_PERM, 3996 .doit = ip_vs_genl_set_cmd, 3997 }, 3998 }; 3999 4000 static struct genl_family ip_vs_genl_family __ro_after_init = { 4001 .hdrsize = 0, 4002 .name = IPVS_GENL_NAME, 4003 .version = IPVS_GENL_VERSION, 4004 .maxattr = IPVS_CMD_ATTR_MAX, 4005 .policy = ip_vs_cmd_policy, 4006 .netnsok = true, /* Make ipvsadm to work on netns */ 4007 .module = THIS_MODULE, 4008 .small_ops = ip_vs_genl_ops, 4009 .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops), 4010 }; 4011 4012 static int __init ip_vs_genl_register(void) 4013 { 4014 return genl_register_family(&ip_vs_genl_family); 4015 } 4016 4017 static void ip_vs_genl_unregister(void) 4018 { 4019 genl_unregister_family(&ip_vs_genl_family); 4020 } 4021 4022 /* End of Generic Netlink interface definitions */ 4023 4024 /* 4025 * per netns intit/exit func. 4026 */ 4027 #ifdef CONFIG_SYSCTL 4028 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) 4029 { 4030 struct net *net = ipvs->net; 4031 int idx; 4032 struct ctl_table *tbl; 4033 4034 atomic_set(&ipvs->dropentry, 0); 4035 spin_lock_init(&ipvs->dropentry_lock); 4036 spin_lock_init(&ipvs->droppacket_lock); 4037 spin_lock_init(&ipvs->securetcp_lock); 4038 4039 if (!net_eq(net, &init_net)) { 4040 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 4041 if (tbl == NULL) 4042 return -ENOMEM; 4043 4044 /* Don't export sysctls to unprivileged users */ 4045 if (net->user_ns != &init_user_ns) 4046 tbl[0].procname = NULL; 4047 } else 4048 tbl = vs_vars; 4049 /* Initialize sysctl defaults */ 4050 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) { 4051 if (tbl[idx].proc_handler == proc_do_defense_mode) 4052 tbl[idx].extra2 = ipvs; 4053 } 4054 idx = 0; 4055 ipvs->sysctl_amemthresh = 1024; 4056 tbl[idx++].data = &ipvs->sysctl_amemthresh; 4057 ipvs->sysctl_am_droprate = 10; 4058 tbl[idx++].data = &ipvs->sysctl_am_droprate; 4059 tbl[idx++].data = &ipvs->sysctl_drop_entry; 4060 tbl[idx++].data = &ipvs->sysctl_drop_packet; 4061 #ifdef CONFIG_IP_VS_NFCT 4062 tbl[idx++].data = &ipvs->sysctl_conntrack; 4063 #endif 4064 tbl[idx++].data = &ipvs->sysctl_secure_tcp; 4065 ipvs->sysctl_snat_reroute = 1; 4066 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 4067 ipvs->sysctl_sync_ver = 1; 4068 tbl[idx++].data = &ipvs->sysctl_sync_ver; 4069 ipvs->sysctl_sync_ports = 1; 4070 tbl[idx++].data = &ipvs->sysctl_sync_ports; 4071 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; 4072 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 4073 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 4074 ipvs->sysctl_sync_sock_size = 0; 4075 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 4076 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 4077 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 4078 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; 4079 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; 4080 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 4081 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 4082 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 4083 tbl[idx].data = &ipvs->sysctl_sync_threshold; 4084 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 4085 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 4086 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; 4087 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); 4088 tbl[idx++].data = &ipvs->sysctl_sync_retries; 4089 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 4090 ipvs->sysctl_pmtu_disc = 1; 4091 tbl[idx++].data = &ipvs->sysctl_pmtu_disc; 4092 tbl[idx++].data = &ipvs->sysctl_backup_only; 4093 ipvs->sysctl_conn_reuse_mode = 1; 4094 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; 4095 tbl[idx++].data = &ipvs->sysctl_schedule_icmp; 4096 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; 4097 ipvs->sysctl_run_estimation = 1; 4098 tbl[idx++].data = &ipvs->sysctl_run_estimation; 4099 #ifdef CONFIG_IP_VS_DEBUG 4100 /* Global sysctls must be ro in non-init netns */ 4101 if (!net_eq(net, &init_net)) 4102 tbl[idx++].mode = 0444; 4103 #endif 4104 4105 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 4106 if (ipvs->sysctl_hdr == NULL) { 4107 if (!net_eq(net, &init_net)) 4108 kfree(tbl); 4109 return -ENOMEM; 4110 } 4111 ip_vs_start_estimator(ipvs, &ipvs->tot_stats); 4112 ipvs->sysctl_tbl = tbl; 4113 /* Schedule defense work */ 4114 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 4115 queue_delayed_work(system_long_wq, &ipvs->defense_work, 4116 DEFENSE_TIMER_PERIOD); 4117 4118 /* Init delayed work for expiring no dest conn */ 4119 INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, 4120 expire_nodest_conn_handler); 4121 4122 return 0; 4123 } 4124 4125 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) 4126 { 4127 struct net *net = ipvs->net; 4128 4129 cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work); 4130 cancel_delayed_work_sync(&ipvs->defense_work); 4131 cancel_work_sync(&ipvs->defense_work.work); 4132 unregister_net_sysctl_table(ipvs->sysctl_hdr); 4133 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); 4134 4135 if (!net_eq(net, &init_net)) 4136 kfree(ipvs->sysctl_tbl); 4137 } 4138 4139 #else 4140 4141 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; } 4142 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { } 4143 4144 #endif 4145 4146 static struct notifier_block ip_vs_dst_notifier = { 4147 .notifier_call = ip_vs_dst_event, 4148 #ifdef CONFIG_IP_VS_IPV6 4149 .priority = ADDRCONF_NOTIFY_PRIORITY + 5, 4150 #endif 4151 }; 4152 4153 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) 4154 { 4155 int i, idx; 4156 4157 /* Initialize rs_table */ 4158 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 4159 INIT_HLIST_HEAD(&ipvs->rs_table[idx]); 4160 4161 INIT_LIST_HEAD(&ipvs->dest_trash); 4162 spin_lock_init(&ipvs->dest_trash_lock); 4163 timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0); 4164 atomic_set(&ipvs->ftpsvc_counter, 0); 4165 atomic_set(&ipvs->nullsvc_counter, 0); 4166 atomic_set(&ipvs->conn_out_counter, 0); 4167 4168 /* procfs stats */ 4169 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 4170 if (!ipvs->tot_stats.cpustats) 4171 return -ENOMEM; 4172 4173 for_each_possible_cpu(i) { 4174 struct ip_vs_cpu_stats *ipvs_tot_stats; 4175 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); 4176 u64_stats_init(&ipvs_tot_stats->syncp); 4177 } 4178 4179 spin_lock_init(&ipvs->tot_stats.lock); 4180 4181 #ifdef CONFIG_PROC_FS 4182 if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net, 4183 &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter))) 4184 goto err_vs; 4185 if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, 4186 ip_vs_stats_show, NULL)) 4187 goto err_stats; 4188 if (!proc_create_net_single("ip_vs_stats_percpu", 0, 4189 ipvs->net->proc_net, 4190 ip_vs_stats_percpu_show, NULL)) 4191 goto err_percpu; 4192 #endif 4193 4194 if (ip_vs_control_net_init_sysctl(ipvs)) 4195 goto err; 4196 4197 return 0; 4198 4199 err: 4200 #ifdef CONFIG_PROC_FS 4201 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4202 4203 err_percpu: 4204 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4205 4206 err_stats: 4207 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4208 4209 err_vs: 4210 #endif 4211 free_percpu(ipvs->tot_stats.cpustats); 4212 return -ENOMEM; 4213 } 4214 4215 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) 4216 { 4217 ip_vs_trash_cleanup(ipvs); 4218 ip_vs_control_net_cleanup_sysctl(ipvs); 4219 #ifdef CONFIG_PROC_FS 4220 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4221 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4222 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4223 #endif 4224 free_percpu(ipvs->tot_stats.cpustats); 4225 } 4226 4227 int __init ip_vs_register_nl_ioctl(void) 4228 { 4229 int ret; 4230 4231 ret = nf_register_sockopt(&ip_vs_sockopts); 4232 if (ret) { 4233 pr_err("cannot register sockopt.\n"); 4234 goto err_sock; 4235 } 4236 4237 ret = ip_vs_genl_register(); 4238 if (ret) { 4239 pr_err("cannot register Generic Netlink interface.\n"); 4240 goto err_genl; 4241 } 4242 return 0; 4243 4244 err_genl: 4245 nf_unregister_sockopt(&ip_vs_sockopts); 4246 err_sock: 4247 return ret; 4248 } 4249 4250 void ip_vs_unregister_nl_ioctl(void) 4251 { 4252 ip_vs_genl_unregister(); 4253 nf_unregister_sockopt(&ip_vs_sockopts); 4254 } 4255 4256 int __init ip_vs_control_init(void) 4257 { 4258 int idx; 4259 int ret; 4260 4261 EnterFunction(2); 4262 4263 /* Initialize svc_table, ip_vs_svc_fwm_table */ 4264 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 4265 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); 4266 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); 4267 } 4268 4269 smp_wmb(); /* Do we really need it now ? */ 4270 4271 ret = register_netdevice_notifier(&ip_vs_dst_notifier); 4272 if (ret < 0) 4273 return ret; 4274 4275 LeaveFunction(2); 4276 return 0; 4277 } 4278 4279 4280 void ip_vs_control_cleanup(void) 4281 { 4282 EnterFunction(2); 4283 unregister_netdevice_notifier(&ip_vs_dst_notifier); 4284 LeaveFunction(2); 4285 } 4286