1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPVS An implementation of the IP virtual server support for the 4 * LINUX operating system. IPVS is now implemented as a module 5 * over the NetFilter framework. IPVS can be used to build a 6 * high-performance and highly available server based on a 7 * cluster of servers. 8 * 9 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 10 * Peter Kese <peter.kese@ijs.si> 11 * Julian Anastasov <ja@ssi.bg> 12 * 13 * Changes: 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/init.h> 21 #include <linux/types.h> 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/sysctl.h> 25 #include <linux/proc_fs.h> 26 #include <linux/workqueue.h> 27 #include <linux/swap.h> 28 #include <linux/seq_file.h> 29 #include <linux/slab.h> 30 31 #include <linux/netfilter.h> 32 #include <linux/netfilter_ipv4.h> 33 #include <linux/mutex.h> 34 35 #include <net/net_namespace.h> 36 #include <linux/nsproxy.h> 37 #include <net/ip.h> 38 #ifdef CONFIG_IP_VS_IPV6 39 #include <net/ipv6.h> 40 #include <net/ip6_route.h> 41 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 42 #endif 43 #include <net/route.h> 44 #include <net/sock.h> 45 #include <net/genetlink.h> 46 47 #include <linux/uaccess.h> 48 49 #include <net/ip_vs.h> 50 51 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 52 static DEFINE_MUTEX(__ip_vs_mutex); 53 54 /* sysctl variables */ 55 56 #ifdef CONFIG_IP_VS_DEBUG 57 static int sysctl_ip_vs_debug_level = 0; 58 59 int ip_vs_get_debug_level(void) 60 { 61 return sysctl_ip_vs_debug_level; 62 } 63 #endif 64 65 66 /* Protos */ 67 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); 68 69 70 #ifdef CONFIG_IP_VS_IPV6 71 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 72 static bool __ip_vs_addr_is_local_v6(struct net *net, 73 const struct in6_addr *addr) 74 { 75 struct flowi6 fl6 = { 76 .daddr = *addr, 77 }; 78 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); 79 bool is_local; 80 81 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); 82 83 dst_release(dst); 84 return is_local; 85 } 86 #endif 87 88 #ifdef CONFIG_SYSCTL 89 /* 90 * update_defense_level is called from keventd and from sysctl, 91 * so it needs to protect itself from softirqs 92 */ 93 static void update_defense_level(struct netns_ipvs *ipvs) 94 { 95 struct sysinfo i; 96 int availmem; 97 int nomem; 98 int to_change = -1; 99 100 /* we only count free and buffered memory (in pages) */ 101 si_meminfo(&i); 102 availmem = i.freeram + i.bufferram; 103 /* however in linux 2.5 the i.bufferram is total page cache size, 104 we need adjust it */ 105 /* si_swapinfo(&i); */ 106 /* availmem = availmem - (i.totalswap - i.freeswap); */ 107 108 nomem = (availmem < ipvs->sysctl_amemthresh); 109 110 local_bh_disable(); 111 112 /* drop_entry */ 113 spin_lock(&ipvs->dropentry_lock); 114 switch (ipvs->sysctl_drop_entry) { 115 case 0: 116 atomic_set(&ipvs->dropentry, 0); 117 break; 118 case 1: 119 if (nomem) { 120 atomic_set(&ipvs->dropentry, 1); 121 ipvs->sysctl_drop_entry = 2; 122 } else { 123 atomic_set(&ipvs->dropentry, 0); 124 } 125 break; 126 case 2: 127 if (nomem) { 128 atomic_set(&ipvs->dropentry, 1); 129 } else { 130 atomic_set(&ipvs->dropentry, 0); 131 ipvs->sysctl_drop_entry = 1; 132 } 133 break; 134 case 3: 135 atomic_set(&ipvs->dropentry, 1); 136 break; 137 } 138 spin_unlock(&ipvs->dropentry_lock); 139 140 /* drop_packet */ 141 spin_lock(&ipvs->droppacket_lock); 142 switch (ipvs->sysctl_drop_packet) { 143 case 0: 144 ipvs->drop_rate = 0; 145 break; 146 case 1: 147 if (nomem) { 148 ipvs->drop_rate = ipvs->drop_counter 149 = ipvs->sysctl_amemthresh / 150 (ipvs->sysctl_amemthresh-availmem); 151 ipvs->sysctl_drop_packet = 2; 152 } else { 153 ipvs->drop_rate = 0; 154 } 155 break; 156 case 2: 157 if (nomem) { 158 ipvs->drop_rate = ipvs->drop_counter 159 = ipvs->sysctl_amemthresh / 160 (ipvs->sysctl_amemthresh-availmem); 161 } else { 162 ipvs->drop_rate = 0; 163 ipvs->sysctl_drop_packet = 1; 164 } 165 break; 166 case 3: 167 ipvs->drop_rate = ipvs->sysctl_am_droprate; 168 break; 169 } 170 spin_unlock(&ipvs->droppacket_lock); 171 172 /* secure_tcp */ 173 spin_lock(&ipvs->securetcp_lock); 174 switch (ipvs->sysctl_secure_tcp) { 175 case 0: 176 if (ipvs->old_secure_tcp >= 2) 177 to_change = 0; 178 break; 179 case 1: 180 if (nomem) { 181 if (ipvs->old_secure_tcp < 2) 182 to_change = 1; 183 ipvs->sysctl_secure_tcp = 2; 184 } else { 185 if (ipvs->old_secure_tcp >= 2) 186 to_change = 0; 187 } 188 break; 189 case 2: 190 if (nomem) { 191 if (ipvs->old_secure_tcp < 2) 192 to_change = 1; 193 } else { 194 if (ipvs->old_secure_tcp >= 2) 195 to_change = 0; 196 ipvs->sysctl_secure_tcp = 1; 197 } 198 break; 199 case 3: 200 if (ipvs->old_secure_tcp < 2) 201 to_change = 1; 202 break; 203 } 204 ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; 205 if (to_change >= 0) 206 ip_vs_protocol_timeout_change(ipvs, 207 ipvs->sysctl_secure_tcp > 1); 208 spin_unlock(&ipvs->securetcp_lock); 209 210 local_bh_enable(); 211 } 212 213 214 /* 215 * Timer for checking the defense 216 */ 217 #define DEFENSE_TIMER_PERIOD 1*HZ 218 219 static void defense_work_handler(struct work_struct *work) 220 { 221 struct netns_ipvs *ipvs = 222 container_of(work, struct netns_ipvs, defense_work.work); 223 224 update_defense_level(ipvs); 225 if (atomic_read(&ipvs->dropentry)) 226 ip_vs_random_dropentry(ipvs); 227 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 228 } 229 #endif 230 231 int 232 ip_vs_use_count_inc(void) 233 { 234 return try_module_get(THIS_MODULE); 235 } 236 237 void 238 ip_vs_use_count_dec(void) 239 { 240 module_put(THIS_MODULE); 241 } 242 243 244 /* 245 * Hash table: for virtual service lookups 246 */ 247 #define IP_VS_SVC_TAB_BITS 8 248 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 249 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 250 251 /* the service table hashed by <protocol, addr, port> */ 252 static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 253 /* the service table hashed by fwmark */ 254 static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 255 256 257 /* 258 * Returns hash value for virtual service 259 */ 260 static inline unsigned int 261 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, 262 const union nf_inet_addr *addr, __be16 port) 263 { 264 unsigned int porth = ntohs(port); 265 __be32 addr_fold = addr->ip; 266 __u32 ahash; 267 268 #ifdef CONFIG_IP_VS_IPV6 269 if (af == AF_INET6) 270 addr_fold = addr->ip6[0]^addr->ip6[1]^ 271 addr->ip6[2]^addr->ip6[3]; 272 #endif 273 ahash = ntohl(addr_fold); 274 ahash ^= ((size_t) ipvs >> 8); 275 276 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 277 IP_VS_SVC_TAB_MASK; 278 } 279 280 /* 281 * Returns hash value of fwmark for virtual service lookup 282 */ 283 static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) 284 { 285 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 286 } 287 288 /* 289 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 290 * or in the ip_vs_svc_fwm_table by fwmark. 291 * Should be called with locked tables. 292 */ 293 static int ip_vs_svc_hash(struct ip_vs_service *svc) 294 { 295 unsigned int hash; 296 297 if (svc->flags & IP_VS_SVC_F_HASHED) { 298 pr_err("%s(): request for already hashed, called from %pS\n", 299 __func__, __builtin_return_address(0)); 300 return 0; 301 } 302 303 if (svc->fwmark == 0) { 304 /* 305 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 306 */ 307 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, 308 &svc->addr, svc->port); 309 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 310 } else { 311 /* 312 * Hash it by fwmark in svc_fwm_table 313 */ 314 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); 315 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 316 } 317 318 svc->flags |= IP_VS_SVC_F_HASHED; 319 /* increase its refcnt because it is referenced by the svc table */ 320 atomic_inc(&svc->refcnt); 321 return 1; 322 } 323 324 325 /* 326 * Unhashes a service from svc_table / svc_fwm_table. 327 * Should be called with locked tables. 328 */ 329 static int ip_vs_svc_unhash(struct ip_vs_service *svc) 330 { 331 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 332 pr_err("%s(): request for unhash flagged, called from %pS\n", 333 __func__, __builtin_return_address(0)); 334 return 0; 335 } 336 337 if (svc->fwmark == 0) { 338 /* Remove it from the svc_table table */ 339 hlist_del_rcu(&svc->s_list); 340 } else { 341 /* Remove it from the svc_fwm_table table */ 342 hlist_del_rcu(&svc->f_list); 343 } 344 345 svc->flags &= ~IP_VS_SVC_F_HASHED; 346 atomic_dec(&svc->refcnt); 347 return 1; 348 } 349 350 351 /* 352 * Get service by {netns, proto,addr,port} in the service table. 353 */ 354 static inline struct ip_vs_service * 355 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, 356 const union nf_inet_addr *vaddr, __be16 vport) 357 { 358 unsigned int hash; 359 struct ip_vs_service *svc; 360 361 /* Check for "full" addressed entries */ 362 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); 363 364 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 365 if ((svc->af == af) 366 && ip_vs_addr_equal(af, &svc->addr, vaddr) 367 && (svc->port == vport) 368 && (svc->protocol == protocol) 369 && (svc->ipvs == ipvs)) { 370 /* HIT */ 371 return svc; 372 } 373 } 374 375 return NULL; 376 } 377 378 379 /* 380 * Get service by {fwmark} in the service table. 381 */ 382 static inline struct ip_vs_service * 383 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) 384 { 385 unsigned int hash; 386 struct ip_vs_service *svc; 387 388 /* Check for fwmark addressed entries */ 389 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); 390 391 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 392 if (svc->fwmark == fwmark && svc->af == af 393 && (svc->ipvs == ipvs)) { 394 /* HIT */ 395 return svc; 396 } 397 } 398 399 return NULL; 400 } 401 402 /* Find service, called under RCU lock */ 403 struct ip_vs_service * 404 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, 405 const union nf_inet_addr *vaddr, __be16 vport) 406 { 407 struct ip_vs_service *svc; 408 409 /* 410 * Check the table hashed by fwmark first 411 */ 412 if (fwmark) { 413 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); 414 if (svc) 415 goto out; 416 } 417 418 /* 419 * Check the table hashed by <protocol,addr,port> 420 * for "full" addressed entries 421 */ 422 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); 423 424 if (!svc && protocol == IPPROTO_TCP && 425 atomic_read(&ipvs->ftpsvc_counter) && 426 (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) { 427 /* 428 * Check if ftp service entry exists, the packet 429 * might belong to FTP data connections. 430 */ 431 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); 432 } 433 434 if (svc == NULL 435 && atomic_read(&ipvs->nullsvc_counter)) { 436 /* 437 * Check if the catch-all port (port zero) exists 438 */ 439 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0); 440 } 441 442 out: 443 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 444 fwmark, ip_vs_proto_name(protocol), 445 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 446 svc ? "hit" : "not hit"); 447 448 return svc; 449 } 450 451 452 static inline void 453 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 454 { 455 atomic_inc(&svc->refcnt); 456 rcu_assign_pointer(dest->svc, svc); 457 } 458 459 static void ip_vs_service_free(struct ip_vs_service *svc) 460 { 461 free_percpu(svc->stats.cpustats); 462 kfree(svc); 463 } 464 465 static void ip_vs_service_rcu_free(struct rcu_head *head) 466 { 467 struct ip_vs_service *svc; 468 469 svc = container_of(head, struct ip_vs_service, rcu_head); 470 ip_vs_service_free(svc); 471 } 472 473 static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) 474 { 475 if (atomic_dec_and_test(&svc->refcnt)) { 476 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 477 svc->fwmark, 478 IP_VS_DBG_ADDR(svc->af, &svc->addr), 479 ntohs(svc->port)); 480 if (do_delay) 481 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); 482 else 483 ip_vs_service_free(svc); 484 } 485 } 486 487 488 /* 489 * Returns hash value for real service 490 */ 491 static inline unsigned int ip_vs_rs_hashkey(int af, 492 const union nf_inet_addr *addr, 493 __be16 port) 494 { 495 unsigned int porth = ntohs(port); 496 __be32 addr_fold = addr->ip; 497 498 #ifdef CONFIG_IP_VS_IPV6 499 if (af == AF_INET6) 500 addr_fold = addr->ip6[0]^addr->ip6[1]^ 501 addr->ip6[2]^addr->ip6[3]; 502 #endif 503 504 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 505 & IP_VS_RTAB_MASK; 506 } 507 508 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ 509 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 510 { 511 unsigned int hash; 512 __be16 port; 513 514 if (dest->in_rs_table) 515 return; 516 517 switch (IP_VS_DFWD_METHOD(dest)) { 518 case IP_VS_CONN_F_MASQ: 519 port = dest->port; 520 break; 521 case IP_VS_CONN_F_TUNNEL: 522 switch (dest->tun_type) { 523 case IP_VS_CONN_F_TUNNEL_TYPE_GUE: 524 port = dest->tun_port; 525 break; 526 case IP_VS_CONN_F_TUNNEL_TYPE_IPIP: 527 case IP_VS_CONN_F_TUNNEL_TYPE_GRE: 528 port = 0; 529 break; 530 default: 531 return; 532 } 533 break; 534 default: 535 return; 536 } 537 538 /* 539 * Hash by proto,addr,port, 540 * which are the parameters of the real service. 541 */ 542 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port); 543 544 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); 545 dest->in_rs_table = 1; 546 } 547 548 /* Unhash ip_vs_dest from rs_table. */ 549 static void ip_vs_rs_unhash(struct ip_vs_dest *dest) 550 { 551 /* 552 * Remove it from the rs_table table. 553 */ 554 if (dest->in_rs_table) { 555 hlist_del_rcu(&dest->d_list); 556 dest->in_rs_table = 0; 557 } 558 } 559 560 /* Check if real service by <proto,addr,port> is present */ 561 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, 562 const union nf_inet_addr *daddr, __be16 dport) 563 { 564 unsigned int hash; 565 struct ip_vs_dest *dest; 566 567 /* Check for "full" addressed entries */ 568 hash = ip_vs_rs_hashkey(af, daddr, dport); 569 570 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 571 if (dest->port == dport && 572 dest->af == af && 573 ip_vs_addr_equal(af, &dest->addr, daddr) && 574 (dest->protocol == protocol || dest->vfwmark) && 575 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 576 /* HIT */ 577 return true; 578 } 579 } 580 581 return false; 582 } 583 584 /* Find real service record by <proto,addr,port>. 585 * In case of multiple records with the same <proto,addr,port>, only 586 * the first found record is returned. 587 * 588 * To be called under RCU lock. 589 */ 590 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, 591 __u16 protocol, 592 const union nf_inet_addr *daddr, 593 __be16 dport) 594 { 595 unsigned int hash; 596 struct ip_vs_dest *dest; 597 598 /* Check for "full" addressed entries */ 599 hash = ip_vs_rs_hashkey(af, daddr, dport); 600 601 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 602 if (dest->port == dport && 603 dest->af == af && 604 ip_vs_addr_equal(af, &dest->addr, daddr) && 605 (dest->protocol == protocol || dest->vfwmark) && 606 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 607 /* HIT */ 608 return dest; 609 } 610 } 611 612 return NULL; 613 } 614 615 /* Find real service record by <af,addr,tun_port>. 616 * In case of multiple records with the same <af,addr,tun_port>, only 617 * the first found record is returned. 618 * 619 * To be called under RCU lock. 620 */ 621 struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af, 622 const union nf_inet_addr *daddr, 623 __be16 tun_port) 624 { 625 struct ip_vs_dest *dest; 626 unsigned int hash; 627 628 /* Check for "full" addressed entries */ 629 hash = ip_vs_rs_hashkey(af, daddr, tun_port); 630 631 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 632 if (dest->tun_port == tun_port && 633 dest->af == af && 634 ip_vs_addr_equal(af, &dest->addr, daddr) && 635 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) { 636 /* HIT */ 637 return dest; 638 } 639 } 640 641 return NULL; 642 } 643 644 /* Lookup destination by {addr,port} in the given service 645 * Called under RCU lock. 646 */ 647 static struct ip_vs_dest * 648 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, 649 const union nf_inet_addr *daddr, __be16 dport) 650 { 651 struct ip_vs_dest *dest; 652 653 /* 654 * Find the destination for the given service 655 */ 656 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 657 if ((dest->af == dest_af) && 658 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 659 (dest->port == dport)) { 660 /* HIT */ 661 return dest; 662 } 663 } 664 665 return NULL; 666 } 667 668 /* 669 * Find destination by {daddr,dport,vaddr,protocol} 670 * Created to be used in ip_vs_process_message() in 671 * the backup synchronization daemon. It finds the 672 * destination to be bound to the received connection 673 * on the backup. 674 * Called under RCU lock, no refcnt is returned. 675 */ 676 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af, 677 const union nf_inet_addr *daddr, 678 __be16 dport, 679 const union nf_inet_addr *vaddr, 680 __be16 vport, __u16 protocol, __u32 fwmark, 681 __u32 flags) 682 { 683 struct ip_vs_dest *dest; 684 struct ip_vs_service *svc; 685 __be16 port = dport; 686 687 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport); 688 if (!svc) 689 return NULL; 690 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 691 port = 0; 692 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port); 693 if (!dest) 694 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport); 695 return dest; 696 } 697 698 void ip_vs_dest_dst_rcu_free(struct rcu_head *head) 699 { 700 struct ip_vs_dest_dst *dest_dst = container_of(head, 701 struct ip_vs_dest_dst, 702 rcu_head); 703 704 dst_release(dest_dst->dst_cache); 705 kfree(dest_dst); 706 } 707 708 /* Release dest_dst and dst_cache for dest in user context */ 709 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) 710 { 711 struct ip_vs_dest_dst *old; 712 713 old = rcu_dereference_protected(dest->dest_dst, 1); 714 if (old) { 715 RCU_INIT_POINTER(dest->dest_dst, NULL); 716 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 717 } 718 } 719 720 /* 721 * Lookup dest by {svc,addr,port} in the destination trash. 722 * The destination trash is used to hold the destinations that are removed 723 * from the service table but are still referenced by some conn entries. 724 * The reason to add the destination trash is when the dest is temporary 725 * down (either by administrator or by monitor program), the dest can be 726 * picked back from the trash, the remaining connections to the dest can 727 * continue, and the counting information of the dest is also useful for 728 * scheduling. 729 */ 730 static struct ip_vs_dest * 731 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, 732 const union nf_inet_addr *daddr, __be16 dport) 733 { 734 struct ip_vs_dest *dest; 735 struct netns_ipvs *ipvs = svc->ipvs; 736 737 /* 738 * Find the destination in trash 739 */ 740 spin_lock_bh(&ipvs->dest_trash_lock); 741 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 742 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 743 "dest->refcnt=%d\n", 744 dest->vfwmark, 745 IP_VS_DBG_ADDR(dest->af, &dest->addr), 746 ntohs(dest->port), 747 refcount_read(&dest->refcnt)); 748 if (dest->af == dest_af && 749 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 750 dest->port == dport && 751 dest->vfwmark == svc->fwmark && 752 dest->protocol == svc->protocol && 753 (svc->fwmark || 754 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 755 dest->vport == svc->port))) { 756 /* HIT */ 757 list_del(&dest->t_list); 758 goto out; 759 } 760 } 761 762 dest = NULL; 763 764 out: 765 spin_unlock_bh(&ipvs->dest_trash_lock); 766 767 return dest; 768 } 769 770 static void ip_vs_dest_free(struct ip_vs_dest *dest) 771 { 772 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); 773 774 __ip_vs_dst_cache_reset(dest); 775 __ip_vs_svc_put(svc, false); 776 free_percpu(dest->stats.cpustats); 777 ip_vs_dest_put_and_free(dest); 778 } 779 780 /* 781 * Clean up all the destinations in the trash 782 * Called by the ip_vs_control_cleanup() 783 * 784 * When the ip_vs_control_clearup is activated by ipvs module exit, 785 * the service tables must have been flushed and all the connections 786 * are expired, and the refcnt of each destination in the trash must 787 * be 1, so we simply release them here. 788 */ 789 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) 790 { 791 struct ip_vs_dest *dest, *nxt; 792 793 del_timer_sync(&ipvs->dest_trash_timer); 794 /* No need to use dest_trash_lock */ 795 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { 796 list_del(&dest->t_list); 797 ip_vs_dest_free(dest); 798 } 799 } 800 801 static void 802 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) 803 { 804 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c 805 806 spin_lock_bh(&src->lock); 807 808 IP_VS_SHOW_STATS_COUNTER(conns); 809 IP_VS_SHOW_STATS_COUNTER(inpkts); 810 IP_VS_SHOW_STATS_COUNTER(outpkts); 811 IP_VS_SHOW_STATS_COUNTER(inbytes); 812 IP_VS_SHOW_STATS_COUNTER(outbytes); 813 814 ip_vs_read_estimator(dst, src); 815 816 spin_unlock_bh(&src->lock); 817 } 818 819 static void 820 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) 821 { 822 dst->conns = (u32)src->conns; 823 dst->inpkts = (u32)src->inpkts; 824 dst->outpkts = (u32)src->outpkts; 825 dst->inbytes = src->inbytes; 826 dst->outbytes = src->outbytes; 827 dst->cps = (u32)src->cps; 828 dst->inpps = (u32)src->inpps; 829 dst->outpps = (u32)src->outpps; 830 dst->inbps = (u32)src->inbps; 831 dst->outbps = (u32)src->outbps; 832 } 833 834 static void 835 ip_vs_zero_stats(struct ip_vs_stats *stats) 836 { 837 spin_lock_bh(&stats->lock); 838 839 /* get current counters as zero point, rates are zeroed */ 840 841 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c 842 843 IP_VS_ZERO_STATS_COUNTER(conns); 844 IP_VS_ZERO_STATS_COUNTER(inpkts); 845 IP_VS_ZERO_STATS_COUNTER(outpkts); 846 IP_VS_ZERO_STATS_COUNTER(inbytes); 847 IP_VS_ZERO_STATS_COUNTER(outbytes); 848 849 ip_vs_zero_estimator(stats); 850 851 spin_unlock_bh(&stats->lock); 852 } 853 854 /* 855 * Update a destination in the given service 856 */ 857 static void 858 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 859 struct ip_vs_dest_user_kern *udest, int add) 860 { 861 struct netns_ipvs *ipvs = svc->ipvs; 862 struct ip_vs_service *old_svc; 863 struct ip_vs_scheduler *sched; 864 int conn_flags; 865 866 /* We cannot modify an address and change the address family */ 867 BUG_ON(!add && udest->af != dest->af); 868 869 if (add && udest->af != svc->af) 870 ipvs->mixed_address_family_dests++; 871 872 /* keep the last_weight with latest non-0 weight */ 873 if (add || udest->weight != 0) 874 atomic_set(&dest->last_weight, udest->weight); 875 876 /* set the weight and the flags */ 877 atomic_set(&dest->weight, udest->weight); 878 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 879 conn_flags |= IP_VS_CONN_F_INACTIVE; 880 881 /* Need to rehash? */ 882 if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) != 883 IP_VS_DFWD_METHOD(dest) || 884 udest->tun_type != dest->tun_type || 885 udest->tun_port != dest->tun_port) 886 ip_vs_rs_unhash(dest); 887 888 /* set the tunnel info */ 889 dest->tun_type = udest->tun_type; 890 dest->tun_port = udest->tun_port; 891 dest->tun_flags = udest->tun_flags; 892 893 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 894 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 895 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 896 } else { 897 /* FTP-NAT requires conntrack for mangling */ 898 if (svc->port == FTPPORT) 899 ip_vs_register_conntrack(svc); 900 } 901 atomic_set(&dest->conn_flags, conn_flags); 902 /* Put the real service in rs_table if not present. */ 903 ip_vs_rs_hash(ipvs, dest); 904 905 /* bind the service */ 906 old_svc = rcu_dereference_protected(dest->svc, 1); 907 if (!old_svc) { 908 __ip_vs_bind_svc(dest, svc); 909 } else { 910 if (old_svc != svc) { 911 ip_vs_zero_stats(&dest->stats); 912 __ip_vs_bind_svc(dest, svc); 913 __ip_vs_svc_put(old_svc, true); 914 } 915 } 916 917 /* set the dest status flags */ 918 dest->flags |= IP_VS_DEST_F_AVAILABLE; 919 920 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 921 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 922 dest->u_threshold = udest->u_threshold; 923 dest->l_threshold = udest->l_threshold; 924 925 dest->af = udest->af; 926 927 spin_lock_bh(&dest->dst_lock); 928 __ip_vs_dst_cache_reset(dest); 929 spin_unlock_bh(&dest->dst_lock); 930 931 if (add) { 932 ip_vs_start_estimator(svc->ipvs, &dest->stats); 933 list_add_rcu(&dest->n_list, &svc->destinations); 934 svc->num_dests++; 935 sched = rcu_dereference_protected(svc->scheduler, 1); 936 if (sched && sched->add_dest) 937 sched->add_dest(svc, dest); 938 } else { 939 sched = rcu_dereference_protected(svc->scheduler, 1); 940 if (sched && sched->upd_dest) 941 sched->upd_dest(svc, dest); 942 } 943 } 944 945 946 /* 947 * Create a destination for the given service 948 */ 949 static int 950 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, 951 struct ip_vs_dest **dest_p) 952 { 953 struct ip_vs_dest *dest; 954 unsigned int atype, i; 955 956 EnterFunction(2); 957 958 #ifdef CONFIG_IP_VS_IPV6 959 if (udest->af == AF_INET6) { 960 int ret; 961 962 atype = ipv6_addr_type(&udest->addr.in6); 963 if ((!(atype & IPV6_ADDR_UNICAST) || 964 atype & IPV6_ADDR_LINKLOCAL) && 965 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) 966 return -EINVAL; 967 968 ret = nf_defrag_ipv6_enable(svc->ipvs->net); 969 if (ret) 970 return ret; 971 } else 972 #endif 973 { 974 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip); 975 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 976 return -EINVAL; 977 } 978 979 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); 980 if (dest == NULL) 981 return -ENOMEM; 982 983 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 984 if (!dest->stats.cpustats) 985 goto err_alloc; 986 987 for_each_possible_cpu(i) { 988 struct ip_vs_cpu_stats *ip_vs_dest_stats; 989 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); 990 u64_stats_init(&ip_vs_dest_stats->syncp); 991 } 992 993 dest->af = udest->af; 994 dest->protocol = svc->protocol; 995 dest->vaddr = svc->addr; 996 dest->vport = svc->port; 997 dest->vfwmark = svc->fwmark; 998 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); 999 dest->port = udest->port; 1000 1001 atomic_set(&dest->activeconns, 0); 1002 atomic_set(&dest->inactconns, 0); 1003 atomic_set(&dest->persistconns, 0); 1004 refcount_set(&dest->refcnt, 1); 1005 1006 INIT_HLIST_NODE(&dest->d_list); 1007 spin_lock_init(&dest->dst_lock); 1008 spin_lock_init(&dest->stats.lock); 1009 __ip_vs_update_dest(svc, dest, udest, 1); 1010 1011 *dest_p = dest; 1012 1013 LeaveFunction(2); 1014 return 0; 1015 1016 err_alloc: 1017 kfree(dest); 1018 return -ENOMEM; 1019 } 1020 1021 1022 /* 1023 * Add a destination into an existing service 1024 */ 1025 static int 1026 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1027 { 1028 struct ip_vs_dest *dest; 1029 union nf_inet_addr daddr; 1030 __be16 dport = udest->port; 1031 int ret; 1032 1033 EnterFunction(2); 1034 1035 if (udest->weight < 0) { 1036 pr_err("%s(): server weight less than zero\n", __func__); 1037 return -ERANGE; 1038 } 1039 1040 if (udest->l_threshold > udest->u_threshold) { 1041 pr_err("%s(): lower threshold is higher than upper threshold\n", 1042 __func__); 1043 return -ERANGE; 1044 } 1045 1046 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1047 if (udest->tun_port == 0) { 1048 pr_err("%s(): tunnel port is zero\n", __func__); 1049 return -EINVAL; 1050 } 1051 } 1052 1053 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1054 1055 /* We use function that requires RCU lock */ 1056 rcu_read_lock(); 1057 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1058 rcu_read_unlock(); 1059 1060 if (dest != NULL) { 1061 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 1062 return -EEXIST; 1063 } 1064 1065 /* 1066 * Check if the dest already exists in the trash and 1067 * is from the same service 1068 */ 1069 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport); 1070 1071 if (dest != NULL) { 1072 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 1073 "dest->refcnt=%d, service %u/%s:%u\n", 1074 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), 1075 refcount_read(&dest->refcnt), 1076 dest->vfwmark, 1077 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 1078 ntohs(dest->vport)); 1079 1080 __ip_vs_update_dest(svc, dest, udest, 1); 1081 ret = 0; 1082 } else { 1083 /* 1084 * Allocate and initialize the dest structure 1085 */ 1086 ret = ip_vs_new_dest(svc, udest, &dest); 1087 } 1088 LeaveFunction(2); 1089 1090 return ret; 1091 } 1092 1093 1094 /* 1095 * Edit a destination in the given service 1096 */ 1097 static int 1098 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1099 { 1100 struct ip_vs_dest *dest; 1101 union nf_inet_addr daddr; 1102 __be16 dport = udest->port; 1103 1104 EnterFunction(2); 1105 1106 if (udest->weight < 0) { 1107 pr_err("%s(): server weight less than zero\n", __func__); 1108 return -ERANGE; 1109 } 1110 1111 if (udest->l_threshold > udest->u_threshold) { 1112 pr_err("%s(): lower threshold is higher than upper threshold\n", 1113 __func__); 1114 return -ERANGE; 1115 } 1116 1117 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1118 if (udest->tun_port == 0) { 1119 pr_err("%s(): tunnel port is zero\n", __func__); 1120 return -EINVAL; 1121 } 1122 } 1123 1124 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1125 1126 /* We use function that requires RCU lock */ 1127 rcu_read_lock(); 1128 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1129 rcu_read_unlock(); 1130 1131 if (dest == NULL) { 1132 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 1133 return -ENOENT; 1134 } 1135 1136 __ip_vs_update_dest(svc, dest, udest, 0); 1137 LeaveFunction(2); 1138 1139 return 0; 1140 } 1141 1142 /* 1143 * Delete a destination (must be already unlinked from the service) 1144 */ 1145 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, 1146 bool cleanup) 1147 { 1148 ip_vs_stop_estimator(ipvs, &dest->stats); 1149 1150 /* 1151 * Remove it from the d-linked list with the real services. 1152 */ 1153 ip_vs_rs_unhash(dest); 1154 1155 spin_lock_bh(&ipvs->dest_trash_lock); 1156 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1157 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1158 refcount_read(&dest->refcnt)); 1159 if (list_empty(&ipvs->dest_trash) && !cleanup) 1160 mod_timer(&ipvs->dest_trash_timer, 1161 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1162 /* dest lives in trash with reference */ 1163 list_add(&dest->t_list, &ipvs->dest_trash); 1164 dest->idle_start = 0; 1165 spin_unlock_bh(&ipvs->dest_trash_lock); 1166 } 1167 1168 1169 /* 1170 * Unlink a destination from the given service 1171 */ 1172 static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1173 struct ip_vs_dest *dest, 1174 int svcupd) 1175 { 1176 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1177 1178 /* 1179 * Remove it from the d-linked destination list. 1180 */ 1181 list_del_rcu(&dest->n_list); 1182 svc->num_dests--; 1183 1184 if (dest->af != svc->af) 1185 svc->ipvs->mixed_address_family_dests--; 1186 1187 if (svcupd) { 1188 struct ip_vs_scheduler *sched; 1189 1190 sched = rcu_dereference_protected(svc->scheduler, 1); 1191 if (sched && sched->del_dest) 1192 sched->del_dest(svc, dest); 1193 } 1194 } 1195 1196 1197 /* 1198 * Delete a destination server in the given service 1199 */ 1200 static int 1201 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1202 { 1203 struct ip_vs_dest *dest; 1204 __be16 dport = udest->port; 1205 1206 EnterFunction(2); 1207 1208 /* We use function that requires RCU lock */ 1209 rcu_read_lock(); 1210 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); 1211 rcu_read_unlock(); 1212 1213 if (dest == NULL) { 1214 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1215 return -ENOENT; 1216 } 1217 1218 /* 1219 * Unlink dest from the service 1220 */ 1221 __ip_vs_unlink_dest(svc, dest, 1); 1222 1223 /* 1224 * Delete the destination 1225 */ 1226 __ip_vs_del_dest(svc->ipvs, dest, false); 1227 1228 LeaveFunction(2); 1229 1230 return 0; 1231 } 1232 1233 static void ip_vs_dest_trash_expire(struct timer_list *t) 1234 { 1235 struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer); 1236 struct ip_vs_dest *dest, *next; 1237 unsigned long now = jiffies; 1238 1239 spin_lock(&ipvs->dest_trash_lock); 1240 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1241 if (refcount_read(&dest->refcnt) > 1) 1242 continue; 1243 if (dest->idle_start) { 1244 if (time_before(now, dest->idle_start + 1245 IP_VS_DEST_TRASH_PERIOD)) 1246 continue; 1247 } else { 1248 dest->idle_start = max(1UL, now); 1249 continue; 1250 } 1251 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1252 dest->vfwmark, 1253 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1254 ntohs(dest->port)); 1255 list_del(&dest->t_list); 1256 ip_vs_dest_free(dest); 1257 } 1258 if (!list_empty(&ipvs->dest_trash)) 1259 mod_timer(&ipvs->dest_trash_timer, 1260 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1261 spin_unlock(&ipvs->dest_trash_lock); 1262 } 1263 1264 /* 1265 * Add a service into the service hash table 1266 */ 1267 static int 1268 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, 1269 struct ip_vs_service **svc_p) 1270 { 1271 int ret = 0, i; 1272 struct ip_vs_scheduler *sched = NULL; 1273 struct ip_vs_pe *pe = NULL; 1274 struct ip_vs_service *svc = NULL; 1275 1276 /* increase the module use count */ 1277 if (!ip_vs_use_count_inc()) 1278 return -ENOPROTOOPT; 1279 1280 /* Lookup the scheduler by 'u->sched_name' */ 1281 if (strcmp(u->sched_name, "none")) { 1282 sched = ip_vs_scheduler_get(u->sched_name); 1283 if (!sched) { 1284 pr_info("Scheduler module ip_vs_%s not found\n", 1285 u->sched_name); 1286 ret = -ENOENT; 1287 goto out_err; 1288 } 1289 } 1290 1291 if (u->pe_name && *u->pe_name) { 1292 pe = ip_vs_pe_getbyname(u->pe_name); 1293 if (pe == NULL) { 1294 pr_info("persistence engine module ip_vs_pe_%s " 1295 "not found\n", u->pe_name); 1296 ret = -ENOENT; 1297 goto out_err; 1298 } 1299 } 1300 1301 #ifdef CONFIG_IP_VS_IPV6 1302 if (u->af == AF_INET6) { 1303 __u32 plen = (__force __u32) u->netmask; 1304 1305 if (plen < 1 || plen > 128) { 1306 ret = -EINVAL; 1307 goto out_err; 1308 } 1309 1310 ret = nf_defrag_ipv6_enable(ipvs->net); 1311 if (ret) 1312 goto out_err; 1313 } 1314 #endif 1315 1316 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); 1317 if (svc == NULL) { 1318 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1319 ret = -ENOMEM; 1320 goto out_err; 1321 } 1322 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1323 if (!svc->stats.cpustats) { 1324 ret = -ENOMEM; 1325 goto out_err; 1326 } 1327 1328 for_each_possible_cpu(i) { 1329 struct ip_vs_cpu_stats *ip_vs_stats; 1330 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); 1331 u64_stats_init(&ip_vs_stats->syncp); 1332 } 1333 1334 1335 /* I'm the first user of the service */ 1336 atomic_set(&svc->refcnt, 0); 1337 1338 svc->af = u->af; 1339 svc->protocol = u->protocol; 1340 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1341 svc->port = u->port; 1342 svc->fwmark = u->fwmark; 1343 svc->flags = u->flags; 1344 svc->timeout = u->timeout * HZ; 1345 svc->netmask = u->netmask; 1346 svc->ipvs = ipvs; 1347 1348 INIT_LIST_HEAD(&svc->destinations); 1349 spin_lock_init(&svc->sched_lock); 1350 spin_lock_init(&svc->stats.lock); 1351 1352 /* Bind the scheduler */ 1353 if (sched) { 1354 ret = ip_vs_bind_scheduler(svc, sched); 1355 if (ret) 1356 goto out_err; 1357 sched = NULL; 1358 } 1359 1360 /* Bind the ct retriever */ 1361 RCU_INIT_POINTER(svc->pe, pe); 1362 pe = NULL; 1363 1364 /* Update the virtual service counters */ 1365 if (svc->port == FTPPORT) 1366 atomic_inc(&ipvs->ftpsvc_counter); 1367 else if (svc->port == 0) 1368 atomic_inc(&ipvs->nullsvc_counter); 1369 if (svc->pe && svc->pe->conn_out) 1370 atomic_inc(&ipvs->conn_out_counter); 1371 1372 ip_vs_start_estimator(ipvs, &svc->stats); 1373 1374 /* Count only IPv4 services for old get/setsockopt interface */ 1375 if (svc->af == AF_INET) 1376 ipvs->num_services++; 1377 1378 /* Hash the service into the service table */ 1379 ip_vs_svc_hash(svc); 1380 1381 *svc_p = svc; 1382 /* Now there is a service - full throttle */ 1383 ipvs->enable = 1; 1384 return 0; 1385 1386 1387 out_err: 1388 if (svc != NULL) { 1389 ip_vs_unbind_scheduler(svc, sched); 1390 ip_vs_service_free(svc); 1391 } 1392 ip_vs_scheduler_put(sched); 1393 ip_vs_pe_put(pe); 1394 1395 /* decrease the module use count */ 1396 ip_vs_use_count_dec(); 1397 1398 return ret; 1399 } 1400 1401 1402 /* 1403 * Edit a service and bind it with a new scheduler 1404 */ 1405 static int 1406 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1407 { 1408 struct ip_vs_scheduler *sched = NULL, *old_sched; 1409 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1410 int ret = 0; 1411 bool new_pe_conn_out, old_pe_conn_out; 1412 1413 /* 1414 * Lookup the scheduler, by 'u->sched_name' 1415 */ 1416 if (strcmp(u->sched_name, "none")) { 1417 sched = ip_vs_scheduler_get(u->sched_name); 1418 if (!sched) { 1419 pr_info("Scheduler module ip_vs_%s not found\n", 1420 u->sched_name); 1421 return -ENOENT; 1422 } 1423 } 1424 old_sched = sched; 1425 1426 if (u->pe_name && *u->pe_name) { 1427 pe = ip_vs_pe_getbyname(u->pe_name); 1428 if (pe == NULL) { 1429 pr_info("persistence engine module ip_vs_pe_%s " 1430 "not found\n", u->pe_name); 1431 ret = -ENOENT; 1432 goto out; 1433 } 1434 old_pe = pe; 1435 } 1436 1437 #ifdef CONFIG_IP_VS_IPV6 1438 if (u->af == AF_INET6) { 1439 __u32 plen = (__force __u32) u->netmask; 1440 1441 if (plen < 1 || plen > 128) { 1442 ret = -EINVAL; 1443 goto out; 1444 } 1445 } 1446 #endif 1447 1448 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1449 if (sched != old_sched) { 1450 if (old_sched) { 1451 ip_vs_unbind_scheduler(svc, old_sched); 1452 RCU_INIT_POINTER(svc->scheduler, NULL); 1453 /* Wait all svc->sched_data users */ 1454 synchronize_rcu(); 1455 } 1456 /* Bind the new scheduler */ 1457 if (sched) { 1458 ret = ip_vs_bind_scheduler(svc, sched); 1459 if (ret) { 1460 ip_vs_scheduler_put(sched); 1461 goto out; 1462 } 1463 } 1464 } 1465 1466 /* 1467 * Set the flags and timeout value 1468 */ 1469 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1470 svc->timeout = u->timeout * HZ; 1471 svc->netmask = u->netmask; 1472 1473 old_pe = rcu_dereference_protected(svc->pe, 1); 1474 if (pe != old_pe) { 1475 rcu_assign_pointer(svc->pe, pe); 1476 /* check for optional methods in new pe */ 1477 new_pe_conn_out = (pe && pe->conn_out) ? true : false; 1478 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; 1479 if (new_pe_conn_out && !old_pe_conn_out) 1480 atomic_inc(&svc->ipvs->conn_out_counter); 1481 if (old_pe_conn_out && !new_pe_conn_out) 1482 atomic_dec(&svc->ipvs->conn_out_counter); 1483 } 1484 1485 out: 1486 ip_vs_scheduler_put(old_sched); 1487 ip_vs_pe_put(old_pe); 1488 return ret; 1489 } 1490 1491 /* 1492 * Delete a service from the service list 1493 * - The service must be unlinked, unlocked and not referenced! 1494 * - We are called under _bh lock 1495 */ 1496 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) 1497 { 1498 struct ip_vs_dest *dest, *nxt; 1499 struct ip_vs_scheduler *old_sched; 1500 struct ip_vs_pe *old_pe; 1501 struct netns_ipvs *ipvs = svc->ipvs; 1502 1503 /* Count only IPv4 services for old get/setsockopt interface */ 1504 if (svc->af == AF_INET) 1505 ipvs->num_services--; 1506 1507 ip_vs_stop_estimator(svc->ipvs, &svc->stats); 1508 1509 /* Unbind scheduler */ 1510 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1511 ip_vs_unbind_scheduler(svc, old_sched); 1512 ip_vs_scheduler_put(old_sched); 1513 1514 /* Unbind persistence engine, keep svc->pe */ 1515 old_pe = rcu_dereference_protected(svc->pe, 1); 1516 if (old_pe && old_pe->conn_out) 1517 atomic_dec(&ipvs->conn_out_counter); 1518 ip_vs_pe_put(old_pe); 1519 1520 /* 1521 * Unlink the whole destination list 1522 */ 1523 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1524 __ip_vs_unlink_dest(svc, dest, 0); 1525 __ip_vs_del_dest(svc->ipvs, dest, cleanup); 1526 } 1527 1528 /* 1529 * Update the virtual service counters 1530 */ 1531 if (svc->port == FTPPORT) 1532 atomic_dec(&ipvs->ftpsvc_counter); 1533 else if (svc->port == 0) 1534 atomic_dec(&ipvs->nullsvc_counter); 1535 1536 /* 1537 * Free the service if nobody refers to it 1538 */ 1539 __ip_vs_svc_put(svc, true); 1540 1541 /* decrease the module use count */ 1542 ip_vs_use_count_dec(); 1543 } 1544 1545 /* 1546 * Unlink a service from list and try to delete it if its refcnt reached 0 1547 */ 1548 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1549 { 1550 ip_vs_unregister_conntrack(svc); 1551 /* Hold svc to avoid double release from dest_trash */ 1552 atomic_inc(&svc->refcnt); 1553 /* 1554 * Unhash it from the service table 1555 */ 1556 ip_vs_svc_unhash(svc); 1557 1558 __ip_vs_del_service(svc, cleanup); 1559 } 1560 1561 /* 1562 * Delete a service from the service list 1563 */ 1564 static int ip_vs_del_service(struct ip_vs_service *svc) 1565 { 1566 if (svc == NULL) 1567 return -EEXIST; 1568 ip_vs_unlink_service(svc, false); 1569 1570 return 0; 1571 } 1572 1573 1574 /* 1575 * Flush all the virtual services 1576 */ 1577 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) 1578 { 1579 int idx; 1580 struct ip_vs_service *svc; 1581 struct hlist_node *n; 1582 1583 /* 1584 * Flush the service table hashed by <netns,protocol,addr,port> 1585 */ 1586 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1587 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1588 s_list) { 1589 if (svc->ipvs == ipvs) 1590 ip_vs_unlink_service(svc, cleanup); 1591 } 1592 } 1593 1594 /* 1595 * Flush the service table hashed by fwmark 1596 */ 1597 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1598 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1599 f_list) { 1600 if (svc->ipvs == ipvs) 1601 ip_vs_unlink_service(svc, cleanup); 1602 } 1603 } 1604 1605 return 0; 1606 } 1607 1608 /* 1609 * Delete service by {netns} in the service table. 1610 * Called by __ip_vs_batch_cleanup() 1611 */ 1612 void ip_vs_service_nets_cleanup(struct list_head *net_list) 1613 { 1614 struct netns_ipvs *ipvs; 1615 struct net *net; 1616 1617 EnterFunction(2); 1618 /* Check for "full" addressed entries */ 1619 mutex_lock(&__ip_vs_mutex); 1620 list_for_each_entry(net, net_list, exit_list) { 1621 ipvs = net_ipvs(net); 1622 ip_vs_flush(ipvs, true); 1623 } 1624 mutex_unlock(&__ip_vs_mutex); 1625 LeaveFunction(2); 1626 } 1627 1628 /* Put all references for device (dst_cache) */ 1629 static inline void 1630 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) 1631 { 1632 struct ip_vs_dest_dst *dest_dst; 1633 1634 spin_lock_bh(&dest->dst_lock); 1635 dest_dst = rcu_dereference_protected(dest->dest_dst, 1); 1636 if (dest_dst && dest_dst->dst_cache->dev == dev) { 1637 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1638 dev->name, 1639 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1640 ntohs(dest->port), 1641 refcount_read(&dest->refcnt)); 1642 __ip_vs_dst_cache_reset(dest); 1643 } 1644 spin_unlock_bh(&dest->dst_lock); 1645 1646 } 1647 /* Netdev event receiver 1648 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1649 */ 1650 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1651 void *ptr) 1652 { 1653 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1654 struct net *net = dev_net(dev); 1655 struct netns_ipvs *ipvs = net_ipvs(net); 1656 struct ip_vs_service *svc; 1657 struct ip_vs_dest *dest; 1658 unsigned int idx; 1659 1660 if (event != NETDEV_DOWN || !ipvs) 1661 return NOTIFY_DONE; 1662 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1663 EnterFunction(2); 1664 mutex_lock(&__ip_vs_mutex); 1665 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1666 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1667 if (svc->ipvs == ipvs) { 1668 list_for_each_entry(dest, &svc->destinations, 1669 n_list) { 1670 ip_vs_forget_dev(dest, dev); 1671 } 1672 } 1673 } 1674 1675 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1676 if (svc->ipvs == ipvs) { 1677 list_for_each_entry(dest, &svc->destinations, 1678 n_list) { 1679 ip_vs_forget_dev(dest, dev); 1680 } 1681 } 1682 1683 } 1684 } 1685 1686 spin_lock_bh(&ipvs->dest_trash_lock); 1687 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 1688 ip_vs_forget_dev(dest, dev); 1689 } 1690 spin_unlock_bh(&ipvs->dest_trash_lock); 1691 mutex_unlock(&__ip_vs_mutex); 1692 LeaveFunction(2); 1693 return NOTIFY_DONE; 1694 } 1695 1696 /* 1697 * Zero counters in a service or all services 1698 */ 1699 static int ip_vs_zero_service(struct ip_vs_service *svc) 1700 { 1701 struct ip_vs_dest *dest; 1702 1703 list_for_each_entry(dest, &svc->destinations, n_list) { 1704 ip_vs_zero_stats(&dest->stats); 1705 } 1706 ip_vs_zero_stats(&svc->stats); 1707 return 0; 1708 } 1709 1710 static int ip_vs_zero_all(struct netns_ipvs *ipvs) 1711 { 1712 int idx; 1713 struct ip_vs_service *svc; 1714 1715 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1716 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1717 if (svc->ipvs == ipvs) 1718 ip_vs_zero_service(svc); 1719 } 1720 } 1721 1722 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1723 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1724 if (svc->ipvs == ipvs) 1725 ip_vs_zero_service(svc); 1726 } 1727 } 1728 1729 ip_vs_zero_stats(&ipvs->tot_stats); 1730 return 0; 1731 } 1732 1733 #ifdef CONFIG_SYSCTL 1734 1735 static int three = 3; 1736 1737 static int 1738 proc_do_defense_mode(struct ctl_table *table, int write, 1739 void __user *buffer, size_t *lenp, loff_t *ppos) 1740 { 1741 struct netns_ipvs *ipvs = table->extra2; 1742 int *valp = table->data; 1743 int val = *valp; 1744 int rc; 1745 1746 struct ctl_table tmp = { 1747 .data = &val, 1748 .maxlen = sizeof(int), 1749 .mode = table->mode, 1750 }; 1751 1752 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1753 if (write && (*valp != val)) { 1754 if (val < 0 || val > 3) { 1755 rc = -EINVAL; 1756 } else { 1757 *valp = val; 1758 update_defense_level(ipvs); 1759 } 1760 } 1761 return rc; 1762 } 1763 1764 static int 1765 proc_do_sync_threshold(struct ctl_table *table, int write, 1766 void __user *buffer, size_t *lenp, loff_t *ppos) 1767 { 1768 int *valp = table->data; 1769 int val[2]; 1770 int rc; 1771 struct ctl_table tmp = { 1772 .data = &val, 1773 .maxlen = table->maxlen, 1774 .mode = table->mode, 1775 }; 1776 1777 memcpy(val, valp, sizeof(val)); 1778 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1779 if (write) { 1780 if (val[0] < 0 || val[1] < 0 || 1781 (val[0] >= val[1] && val[1])) 1782 rc = -EINVAL; 1783 else 1784 memcpy(valp, val, sizeof(val)); 1785 } 1786 return rc; 1787 } 1788 1789 static int 1790 proc_do_sync_ports(struct ctl_table *table, int write, 1791 void __user *buffer, size_t *lenp, loff_t *ppos) 1792 { 1793 int *valp = table->data; 1794 int val = *valp; 1795 int rc; 1796 1797 struct ctl_table tmp = { 1798 .data = &val, 1799 .maxlen = sizeof(int), 1800 .mode = table->mode, 1801 }; 1802 1803 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1804 if (write && (*valp != val)) { 1805 if (val < 1 || !is_power_of_2(val)) 1806 rc = -EINVAL; 1807 else 1808 *valp = val; 1809 } 1810 return rc; 1811 } 1812 1813 /* 1814 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1815 * Do not change order or insert new entries without 1816 * align with netns init in ip_vs_control_net_init() 1817 */ 1818 1819 static struct ctl_table vs_vars[] = { 1820 { 1821 .procname = "amemthresh", 1822 .maxlen = sizeof(int), 1823 .mode = 0644, 1824 .proc_handler = proc_dointvec, 1825 }, 1826 { 1827 .procname = "am_droprate", 1828 .maxlen = sizeof(int), 1829 .mode = 0644, 1830 .proc_handler = proc_dointvec, 1831 }, 1832 { 1833 .procname = "drop_entry", 1834 .maxlen = sizeof(int), 1835 .mode = 0644, 1836 .proc_handler = proc_do_defense_mode, 1837 }, 1838 { 1839 .procname = "drop_packet", 1840 .maxlen = sizeof(int), 1841 .mode = 0644, 1842 .proc_handler = proc_do_defense_mode, 1843 }, 1844 #ifdef CONFIG_IP_VS_NFCT 1845 { 1846 .procname = "conntrack", 1847 .maxlen = sizeof(int), 1848 .mode = 0644, 1849 .proc_handler = &proc_dointvec, 1850 }, 1851 #endif 1852 { 1853 .procname = "secure_tcp", 1854 .maxlen = sizeof(int), 1855 .mode = 0644, 1856 .proc_handler = proc_do_defense_mode, 1857 }, 1858 { 1859 .procname = "snat_reroute", 1860 .maxlen = sizeof(int), 1861 .mode = 0644, 1862 .proc_handler = &proc_dointvec, 1863 }, 1864 { 1865 .procname = "sync_version", 1866 .maxlen = sizeof(int), 1867 .mode = 0644, 1868 .proc_handler = proc_dointvec_minmax, 1869 .extra1 = SYSCTL_ZERO, 1870 .extra2 = SYSCTL_ONE, 1871 }, 1872 { 1873 .procname = "sync_ports", 1874 .maxlen = sizeof(int), 1875 .mode = 0644, 1876 .proc_handler = proc_do_sync_ports, 1877 }, 1878 { 1879 .procname = "sync_persist_mode", 1880 .maxlen = sizeof(int), 1881 .mode = 0644, 1882 .proc_handler = proc_dointvec, 1883 }, 1884 { 1885 .procname = "sync_qlen_max", 1886 .maxlen = sizeof(unsigned long), 1887 .mode = 0644, 1888 .proc_handler = proc_doulongvec_minmax, 1889 }, 1890 { 1891 .procname = "sync_sock_size", 1892 .maxlen = sizeof(int), 1893 .mode = 0644, 1894 .proc_handler = proc_dointvec, 1895 }, 1896 { 1897 .procname = "cache_bypass", 1898 .maxlen = sizeof(int), 1899 .mode = 0644, 1900 .proc_handler = proc_dointvec, 1901 }, 1902 { 1903 .procname = "expire_nodest_conn", 1904 .maxlen = sizeof(int), 1905 .mode = 0644, 1906 .proc_handler = proc_dointvec, 1907 }, 1908 { 1909 .procname = "sloppy_tcp", 1910 .maxlen = sizeof(int), 1911 .mode = 0644, 1912 .proc_handler = proc_dointvec, 1913 }, 1914 { 1915 .procname = "sloppy_sctp", 1916 .maxlen = sizeof(int), 1917 .mode = 0644, 1918 .proc_handler = proc_dointvec, 1919 }, 1920 { 1921 .procname = "expire_quiescent_template", 1922 .maxlen = sizeof(int), 1923 .mode = 0644, 1924 .proc_handler = proc_dointvec, 1925 }, 1926 { 1927 .procname = "sync_threshold", 1928 .maxlen = 1929 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1930 .mode = 0644, 1931 .proc_handler = proc_do_sync_threshold, 1932 }, 1933 { 1934 .procname = "sync_refresh_period", 1935 .maxlen = sizeof(int), 1936 .mode = 0644, 1937 .proc_handler = proc_dointvec_jiffies, 1938 }, 1939 { 1940 .procname = "sync_retries", 1941 .maxlen = sizeof(int), 1942 .mode = 0644, 1943 .proc_handler = proc_dointvec_minmax, 1944 .extra1 = SYSCTL_ZERO, 1945 .extra2 = &three, 1946 }, 1947 { 1948 .procname = "nat_icmp_send", 1949 .maxlen = sizeof(int), 1950 .mode = 0644, 1951 .proc_handler = proc_dointvec, 1952 }, 1953 { 1954 .procname = "pmtu_disc", 1955 .maxlen = sizeof(int), 1956 .mode = 0644, 1957 .proc_handler = proc_dointvec, 1958 }, 1959 { 1960 .procname = "backup_only", 1961 .maxlen = sizeof(int), 1962 .mode = 0644, 1963 .proc_handler = proc_dointvec, 1964 }, 1965 { 1966 .procname = "conn_reuse_mode", 1967 .maxlen = sizeof(int), 1968 .mode = 0644, 1969 .proc_handler = proc_dointvec, 1970 }, 1971 { 1972 .procname = "schedule_icmp", 1973 .maxlen = sizeof(int), 1974 .mode = 0644, 1975 .proc_handler = proc_dointvec, 1976 }, 1977 { 1978 .procname = "ignore_tunneled", 1979 .maxlen = sizeof(int), 1980 .mode = 0644, 1981 .proc_handler = proc_dointvec, 1982 }, 1983 #ifdef CONFIG_IP_VS_DEBUG 1984 { 1985 .procname = "debug_level", 1986 .data = &sysctl_ip_vs_debug_level, 1987 .maxlen = sizeof(int), 1988 .mode = 0644, 1989 .proc_handler = proc_dointvec, 1990 }, 1991 #endif 1992 { } 1993 }; 1994 1995 #endif 1996 1997 #ifdef CONFIG_PROC_FS 1998 1999 struct ip_vs_iter { 2000 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 2001 struct hlist_head *table; 2002 int bucket; 2003 }; 2004 2005 /* 2006 * Write the contents of the VS rule table to a PROCfs file. 2007 * (It is kept just for backward compatibility) 2008 */ 2009 static inline const char *ip_vs_fwd_name(unsigned int flags) 2010 { 2011 switch (flags & IP_VS_CONN_F_FWD_MASK) { 2012 case IP_VS_CONN_F_LOCALNODE: 2013 return "Local"; 2014 case IP_VS_CONN_F_TUNNEL: 2015 return "Tunnel"; 2016 case IP_VS_CONN_F_DROUTE: 2017 return "Route"; 2018 default: 2019 return "Masq"; 2020 } 2021 } 2022 2023 2024 /* Get the Nth entry in the two lists */ 2025 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 2026 { 2027 struct net *net = seq_file_net(seq); 2028 struct netns_ipvs *ipvs = net_ipvs(net); 2029 struct ip_vs_iter *iter = seq->private; 2030 int idx; 2031 struct ip_vs_service *svc; 2032 2033 /* look in hash by protocol */ 2034 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2035 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 2036 if ((svc->ipvs == ipvs) && pos-- == 0) { 2037 iter->table = ip_vs_svc_table; 2038 iter->bucket = idx; 2039 return svc; 2040 } 2041 } 2042 } 2043 2044 /* keep looking in fwmark */ 2045 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2046 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 2047 f_list) { 2048 if ((svc->ipvs == ipvs) && pos-- == 0) { 2049 iter->table = ip_vs_svc_fwm_table; 2050 iter->bucket = idx; 2051 return svc; 2052 } 2053 } 2054 } 2055 2056 return NULL; 2057 } 2058 2059 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 2060 __acquires(RCU) 2061 { 2062 rcu_read_lock(); 2063 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 2064 } 2065 2066 2067 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2068 { 2069 struct hlist_node *e; 2070 struct ip_vs_iter *iter; 2071 struct ip_vs_service *svc; 2072 2073 ++*pos; 2074 if (v == SEQ_START_TOKEN) 2075 return ip_vs_info_array(seq,0); 2076 2077 svc = v; 2078 iter = seq->private; 2079 2080 if (iter->table == ip_vs_svc_table) { 2081 /* next service in table hashed by protocol */ 2082 e = rcu_dereference(hlist_next_rcu(&svc->s_list)); 2083 if (e) 2084 return hlist_entry(e, struct ip_vs_service, s_list); 2085 2086 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2087 hlist_for_each_entry_rcu(svc, 2088 &ip_vs_svc_table[iter->bucket], 2089 s_list) { 2090 return svc; 2091 } 2092 } 2093 2094 iter->table = ip_vs_svc_fwm_table; 2095 iter->bucket = -1; 2096 goto scan_fwmark; 2097 } 2098 2099 /* next service in hashed by fwmark */ 2100 e = rcu_dereference(hlist_next_rcu(&svc->f_list)); 2101 if (e) 2102 return hlist_entry(e, struct ip_vs_service, f_list); 2103 2104 scan_fwmark: 2105 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2106 hlist_for_each_entry_rcu(svc, 2107 &ip_vs_svc_fwm_table[iter->bucket], 2108 f_list) 2109 return svc; 2110 } 2111 2112 return NULL; 2113 } 2114 2115 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 2116 __releases(RCU) 2117 { 2118 rcu_read_unlock(); 2119 } 2120 2121 2122 static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 2123 { 2124 if (v == SEQ_START_TOKEN) { 2125 seq_printf(seq, 2126 "IP Virtual Server version %d.%d.%d (size=%d)\n", 2127 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2128 seq_puts(seq, 2129 "Prot LocalAddress:Port Scheduler Flags\n"); 2130 seq_puts(seq, 2131 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2132 } else { 2133 struct net *net = seq_file_net(seq); 2134 struct netns_ipvs *ipvs = net_ipvs(net); 2135 const struct ip_vs_service *svc = v; 2136 const struct ip_vs_iter *iter = seq->private; 2137 const struct ip_vs_dest *dest; 2138 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2139 char *sched_name = sched ? sched->name : "none"; 2140 2141 if (svc->ipvs != ipvs) 2142 return 0; 2143 if (iter->table == ip_vs_svc_table) { 2144 #ifdef CONFIG_IP_VS_IPV6 2145 if (svc->af == AF_INET6) 2146 seq_printf(seq, "%s [%pI6]:%04X %s ", 2147 ip_vs_proto_name(svc->protocol), 2148 &svc->addr.in6, 2149 ntohs(svc->port), 2150 sched_name); 2151 else 2152 #endif 2153 seq_printf(seq, "%s %08X:%04X %s %s ", 2154 ip_vs_proto_name(svc->protocol), 2155 ntohl(svc->addr.ip), 2156 ntohs(svc->port), 2157 sched_name, 2158 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2159 } else { 2160 seq_printf(seq, "FWM %08X %s %s", 2161 svc->fwmark, sched_name, 2162 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2163 } 2164 2165 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 2166 seq_printf(seq, "persistent %d %08X\n", 2167 svc->timeout, 2168 ntohl(svc->netmask)); 2169 else 2170 seq_putc(seq, '\n'); 2171 2172 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 2173 #ifdef CONFIG_IP_VS_IPV6 2174 if (dest->af == AF_INET6) 2175 seq_printf(seq, 2176 " -> [%pI6]:%04X" 2177 " %-7s %-6d %-10d %-10d\n", 2178 &dest->addr.in6, 2179 ntohs(dest->port), 2180 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2181 atomic_read(&dest->weight), 2182 atomic_read(&dest->activeconns), 2183 atomic_read(&dest->inactconns)); 2184 else 2185 #endif 2186 seq_printf(seq, 2187 " -> %08X:%04X " 2188 "%-7s %-6d %-10d %-10d\n", 2189 ntohl(dest->addr.ip), 2190 ntohs(dest->port), 2191 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2192 atomic_read(&dest->weight), 2193 atomic_read(&dest->activeconns), 2194 atomic_read(&dest->inactconns)); 2195 2196 } 2197 } 2198 return 0; 2199 } 2200 2201 static const struct seq_operations ip_vs_info_seq_ops = { 2202 .start = ip_vs_info_seq_start, 2203 .next = ip_vs_info_seq_next, 2204 .stop = ip_vs_info_seq_stop, 2205 .show = ip_vs_info_seq_show, 2206 }; 2207 2208 static int ip_vs_stats_show(struct seq_file *seq, void *v) 2209 { 2210 struct net *net = seq_file_single_net(seq); 2211 struct ip_vs_kstats show; 2212 2213 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2214 seq_puts(seq, 2215 " Total Incoming Outgoing Incoming Outgoing\n"); 2216 seq_puts(seq, 2217 " Conns Packets Packets Bytes Bytes\n"); 2218 2219 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); 2220 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", 2221 (unsigned long long)show.conns, 2222 (unsigned long long)show.inpkts, 2223 (unsigned long long)show.outpkts, 2224 (unsigned long long)show.inbytes, 2225 (unsigned long long)show.outbytes); 2226 2227 /* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ 2228 seq_puts(seq, 2229 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2230 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", 2231 (unsigned long long)show.cps, 2232 (unsigned long long)show.inpps, 2233 (unsigned long long)show.outpps, 2234 (unsigned long long)show.inbps, 2235 (unsigned long long)show.outbps); 2236 2237 return 0; 2238 } 2239 2240 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2241 { 2242 struct net *net = seq_file_single_net(seq); 2243 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2244 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; 2245 struct ip_vs_kstats kstats; 2246 int i; 2247 2248 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2249 seq_puts(seq, 2250 " Total Incoming Outgoing Incoming Outgoing\n"); 2251 seq_puts(seq, 2252 "CPU Conns Packets Packets Bytes Bytes\n"); 2253 2254 for_each_possible_cpu(i) { 2255 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); 2256 unsigned int start; 2257 u64 conns, inpkts, outpkts, inbytes, outbytes; 2258 2259 do { 2260 start = u64_stats_fetch_begin_irq(&u->syncp); 2261 conns = u->cnt.conns; 2262 inpkts = u->cnt.inpkts; 2263 outpkts = u->cnt.outpkts; 2264 inbytes = u->cnt.inbytes; 2265 outbytes = u->cnt.outbytes; 2266 } while (u64_stats_fetch_retry_irq(&u->syncp, start)); 2267 2268 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", 2269 i, (u64)conns, (u64)inpkts, 2270 (u64)outpkts, (u64)inbytes, 2271 (u64)outbytes); 2272 } 2273 2274 ip_vs_copy_stats(&kstats, tot_stats); 2275 2276 seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", 2277 (unsigned long long)kstats.conns, 2278 (unsigned long long)kstats.inpkts, 2279 (unsigned long long)kstats.outpkts, 2280 (unsigned long long)kstats.inbytes, 2281 (unsigned long long)kstats.outbytes); 2282 2283 /* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2284 seq_puts(seq, 2285 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2286 seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", 2287 kstats.cps, 2288 kstats.inpps, 2289 kstats.outpps, 2290 kstats.inbps, 2291 kstats.outbps); 2292 2293 return 0; 2294 } 2295 #endif 2296 2297 /* 2298 * Set timeout values for tcp tcpfin udp in the timeout_table. 2299 */ 2300 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2301 { 2302 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2303 struct ip_vs_proto_data *pd; 2304 #endif 2305 2306 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2307 u->tcp_timeout, 2308 u->tcp_fin_timeout, 2309 u->udp_timeout); 2310 2311 #ifdef CONFIG_IP_VS_PROTO_TCP 2312 if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || 2313 u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { 2314 return -EINVAL; 2315 } 2316 #endif 2317 2318 #ifdef CONFIG_IP_VS_PROTO_UDP 2319 if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) 2320 return -EINVAL; 2321 #endif 2322 2323 #ifdef CONFIG_IP_VS_PROTO_TCP 2324 if (u->tcp_timeout) { 2325 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2326 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2327 = u->tcp_timeout * HZ; 2328 } 2329 2330 if (u->tcp_fin_timeout) { 2331 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2332 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2333 = u->tcp_fin_timeout * HZ; 2334 } 2335 #endif 2336 2337 #ifdef CONFIG_IP_VS_PROTO_UDP 2338 if (u->udp_timeout) { 2339 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2340 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2341 = u->udp_timeout * HZ; 2342 } 2343 #endif 2344 return 0; 2345 } 2346 2347 #define CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2348 2349 struct ip_vs_svcdest_user { 2350 struct ip_vs_service_user s; 2351 struct ip_vs_dest_user d; 2352 }; 2353 2354 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = { 2355 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user), 2356 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user), 2357 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user), 2358 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user), 2359 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user), 2360 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user), 2361 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2362 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user), 2363 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user), 2364 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user), 2365 }; 2366 2367 union ip_vs_set_arglen { 2368 struct ip_vs_service_user field_IP_VS_SO_SET_ADD; 2369 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT; 2370 struct ip_vs_service_user field_IP_VS_SO_SET_DEL; 2371 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST; 2372 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST; 2373 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST; 2374 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT; 2375 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON; 2376 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON; 2377 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO; 2378 }; 2379 2380 #define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen) 2381 2382 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2383 struct ip_vs_service_user *usvc_compat) 2384 { 2385 memset(usvc, 0, sizeof(*usvc)); 2386 2387 usvc->af = AF_INET; 2388 usvc->protocol = usvc_compat->protocol; 2389 usvc->addr.ip = usvc_compat->addr; 2390 usvc->port = usvc_compat->port; 2391 usvc->fwmark = usvc_compat->fwmark; 2392 2393 /* Deep copy of sched_name is not needed here */ 2394 usvc->sched_name = usvc_compat->sched_name; 2395 2396 usvc->flags = usvc_compat->flags; 2397 usvc->timeout = usvc_compat->timeout; 2398 usvc->netmask = usvc_compat->netmask; 2399 } 2400 2401 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2402 struct ip_vs_dest_user *udest_compat) 2403 { 2404 memset(udest, 0, sizeof(*udest)); 2405 2406 udest->addr.ip = udest_compat->addr; 2407 udest->port = udest_compat->port; 2408 udest->conn_flags = udest_compat->conn_flags; 2409 udest->weight = udest_compat->weight; 2410 udest->u_threshold = udest_compat->u_threshold; 2411 udest->l_threshold = udest_compat->l_threshold; 2412 udest->af = AF_INET; 2413 udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP; 2414 } 2415 2416 static int 2417 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 2418 { 2419 struct net *net = sock_net(sk); 2420 int ret; 2421 unsigned char arg[MAX_SET_ARGLEN]; 2422 struct ip_vs_service_user *usvc_compat; 2423 struct ip_vs_service_user_kern usvc; 2424 struct ip_vs_service *svc; 2425 struct ip_vs_dest_user *udest_compat; 2426 struct ip_vs_dest_user_kern udest; 2427 struct netns_ipvs *ipvs = net_ipvs(net); 2428 2429 BUILD_BUG_ON(sizeof(arg) > 255); 2430 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2431 return -EPERM; 2432 2433 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 2434 return -EINVAL; 2435 if (len != set_arglen[CMDID(cmd)]) { 2436 IP_VS_DBG(1, "set_ctl: len %u != %u\n", 2437 len, set_arglen[CMDID(cmd)]); 2438 return -EINVAL; 2439 } 2440 2441 if (copy_from_user(arg, user, len) != 0) 2442 return -EFAULT; 2443 2444 /* Handle daemons since they have another lock */ 2445 if (cmd == IP_VS_SO_SET_STARTDAEMON || 2446 cmd == IP_VS_SO_SET_STOPDAEMON) { 2447 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2448 2449 if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2450 struct ipvs_sync_daemon_cfg cfg; 2451 2452 memset(&cfg, 0, sizeof(cfg)); 2453 ret = -EINVAL; 2454 if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, 2455 sizeof(cfg.mcast_ifn)) <= 0) 2456 return ret; 2457 cfg.syncid = dm->syncid; 2458 ret = start_sync_thread(ipvs, &cfg, dm->state); 2459 } else { 2460 ret = stop_sync_thread(ipvs, dm->state); 2461 } 2462 return ret; 2463 } 2464 2465 mutex_lock(&__ip_vs_mutex); 2466 if (cmd == IP_VS_SO_SET_FLUSH) { 2467 /* Flush the virtual service */ 2468 ret = ip_vs_flush(ipvs, false); 2469 goto out_unlock; 2470 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2471 /* Set timeout values for (tcp tcpfin udp) */ 2472 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); 2473 goto out_unlock; 2474 } 2475 2476 usvc_compat = (struct ip_vs_service_user *)arg; 2477 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2478 2479 /* We only use the new structs internally, so copy userspace compat 2480 * structs to extended internal versions */ 2481 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2482 ip_vs_copy_udest_compat(&udest, udest_compat); 2483 2484 if (cmd == IP_VS_SO_SET_ZERO) { 2485 /* if no service address is set, zero counters in all */ 2486 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2487 ret = ip_vs_zero_all(ipvs); 2488 goto out_unlock; 2489 } 2490 } 2491 2492 if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) && 2493 strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) == 2494 IP_VS_SCHEDNAME_MAXLEN) { 2495 ret = -EINVAL; 2496 goto out_unlock; 2497 } 2498 2499 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2500 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2501 usvc.protocol != IPPROTO_SCTP) { 2502 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n", 2503 usvc.protocol, &usvc.addr.ip, 2504 ntohs(usvc.port)); 2505 ret = -EFAULT; 2506 goto out_unlock; 2507 } 2508 2509 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2510 rcu_read_lock(); 2511 if (usvc.fwmark == 0) 2512 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol, 2513 &usvc.addr, usvc.port); 2514 else 2515 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark); 2516 rcu_read_unlock(); 2517 2518 if (cmd != IP_VS_SO_SET_ADD 2519 && (svc == NULL || svc->protocol != usvc.protocol)) { 2520 ret = -ESRCH; 2521 goto out_unlock; 2522 } 2523 2524 switch (cmd) { 2525 case IP_VS_SO_SET_ADD: 2526 if (svc != NULL) 2527 ret = -EEXIST; 2528 else 2529 ret = ip_vs_add_service(ipvs, &usvc, &svc); 2530 break; 2531 case IP_VS_SO_SET_EDIT: 2532 ret = ip_vs_edit_service(svc, &usvc); 2533 break; 2534 case IP_VS_SO_SET_DEL: 2535 ret = ip_vs_del_service(svc); 2536 if (!ret) 2537 goto out_unlock; 2538 break; 2539 case IP_VS_SO_SET_ZERO: 2540 ret = ip_vs_zero_service(svc); 2541 break; 2542 case IP_VS_SO_SET_ADDDEST: 2543 ret = ip_vs_add_dest(svc, &udest); 2544 break; 2545 case IP_VS_SO_SET_EDITDEST: 2546 ret = ip_vs_edit_dest(svc, &udest); 2547 break; 2548 case IP_VS_SO_SET_DELDEST: 2549 ret = ip_vs_del_dest(svc, &udest); 2550 break; 2551 default: 2552 ret = -EINVAL; 2553 } 2554 2555 out_unlock: 2556 mutex_unlock(&__ip_vs_mutex); 2557 return ret; 2558 } 2559 2560 2561 static void 2562 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2563 { 2564 struct ip_vs_scheduler *sched; 2565 struct ip_vs_kstats kstats; 2566 char *sched_name; 2567 2568 sched = rcu_dereference_protected(src->scheduler, 1); 2569 sched_name = sched ? sched->name : "none"; 2570 dst->protocol = src->protocol; 2571 dst->addr = src->addr.ip; 2572 dst->port = src->port; 2573 dst->fwmark = src->fwmark; 2574 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); 2575 dst->flags = src->flags; 2576 dst->timeout = src->timeout / HZ; 2577 dst->netmask = src->netmask; 2578 dst->num_dests = src->num_dests; 2579 ip_vs_copy_stats(&kstats, &src->stats); 2580 ip_vs_export_stats_user(&dst->stats, &kstats); 2581 } 2582 2583 static inline int 2584 __ip_vs_get_service_entries(struct netns_ipvs *ipvs, 2585 const struct ip_vs_get_services *get, 2586 struct ip_vs_get_services __user *uptr) 2587 { 2588 int idx, count=0; 2589 struct ip_vs_service *svc; 2590 struct ip_vs_service_entry entry; 2591 int ret = 0; 2592 2593 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2594 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2595 /* Only expose IPv4 entries to old interface */ 2596 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2597 continue; 2598 2599 if (count >= get->num_services) 2600 goto out; 2601 memset(&entry, 0, sizeof(entry)); 2602 ip_vs_copy_service(&entry, svc); 2603 if (copy_to_user(&uptr->entrytable[count], 2604 &entry, sizeof(entry))) { 2605 ret = -EFAULT; 2606 goto out; 2607 } 2608 count++; 2609 } 2610 } 2611 2612 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2613 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2614 /* Only expose IPv4 entries to old interface */ 2615 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2616 continue; 2617 2618 if (count >= get->num_services) 2619 goto out; 2620 memset(&entry, 0, sizeof(entry)); 2621 ip_vs_copy_service(&entry, svc); 2622 if (copy_to_user(&uptr->entrytable[count], 2623 &entry, sizeof(entry))) { 2624 ret = -EFAULT; 2625 goto out; 2626 } 2627 count++; 2628 } 2629 } 2630 out: 2631 return ret; 2632 } 2633 2634 static inline int 2635 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get, 2636 struct ip_vs_get_dests __user *uptr) 2637 { 2638 struct ip_vs_service *svc; 2639 union nf_inet_addr addr = { .ip = get->addr }; 2640 int ret = 0; 2641 2642 rcu_read_lock(); 2643 if (get->fwmark) 2644 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark); 2645 else 2646 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr, 2647 get->port); 2648 rcu_read_unlock(); 2649 2650 if (svc) { 2651 int count = 0; 2652 struct ip_vs_dest *dest; 2653 struct ip_vs_dest_entry entry; 2654 struct ip_vs_kstats kstats; 2655 2656 memset(&entry, 0, sizeof(entry)); 2657 list_for_each_entry(dest, &svc->destinations, n_list) { 2658 if (count >= get->num_dests) 2659 break; 2660 2661 /* Cannot expose heterogeneous members via sockopt 2662 * interface 2663 */ 2664 if (dest->af != svc->af) 2665 continue; 2666 2667 entry.addr = dest->addr.ip; 2668 entry.port = dest->port; 2669 entry.conn_flags = atomic_read(&dest->conn_flags); 2670 entry.weight = atomic_read(&dest->weight); 2671 entry.u_threshold = dest->u_threshold; 2672 entry.l_threshold = dest->l_threshold; 2673 entry.activeconns = atomic_read(&dest->activeconns); 2674 entry.inactconns = atomic_read(&dest->inactconns); 2675 entry.persistconns = atomic_read(&dest->persistconns); 2676 ip_vs_copy_stats(&kstats, &dest->stats); 2677 ip_vs_export_stats_user(&entry.stats, &kstats); 2678 if (copy_to_user(&uptr->entrytable[count], 2679 &entry, sizeof(entry))) { 2680 ret = -EFAULT; 2681 break; 2682 } 2683 count++; 2684 } 2685 } else 2686 ret = -ESRCH; 2687 return ret; 2688 } 2689 2690 static inline void 2691 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2692 { 2693 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2694 struct ip_vs_proto_data *pd; 2695 #endif 2696 2697 memset(u, 0, sizeof (*u)); 2698 2699 #ifdef CONFIG_IP_VS_PROTO_TCP 2700 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2701 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2702 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2703 #endif 2704 #ifdef CONFIG_IP_VS_PROTO_UDP 2705 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2706 u->udp_timeout = 2707 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2708 #endif 2709 } 2710 2711 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = { 2712 [CMDID(IP_VS_SO_GET_VERSION)] = 64, 2713 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo), 2714 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services), 2715 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry), 2716 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests), 2717 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2718 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user), 2719 }; 2720 2721 union ip_vs_get_arglen { 2722 char field_IP_VS_SO_GET_VERSION[64]; 2723 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO; 2724 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES; 2725 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE; 2726 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS; 2727 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT; 2728 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2]; 2729 }; 2730 2731 #define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen) 2732 2733 static int 2734 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2735 { 2736 unsigned char arg[MAX_GET_ARGLEN]; 2737 int ret = 0; 2738 unsigned int copylen; 2739 struct net *net = sock_net(sk); 2740 struct netns_ipvs *ipvs = net_ipvs(net); 2741 2742 BUG_ON(!net); 2743 BUILD_BUG_ON(sizeof(arg) > 255); 2744 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2745 return -EPERM; 2746 2747 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2748 return -EINVAL; 2749 2750 copylen = get_arglen[CMDID(cmd)]; 2751 if (*len < (int) copylen) { 2752 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen); 2753 return -EINVAL; 2754 } 2755 2756 if (copy_from_user(arg, user, copylen) != 0) 2757 return -EFAULT; 2758 /* 2759 * Handle daemons first since it has its own locking 2760 */ 2761 if (cmd == IP_VS_SO_GET_DAEMON) { 2762 struct ip_vs_daemon_user d[2]; 2763 2764 memset(&d, 0, sizeof(d)); 2765 mutex_lock(&ipvs->sync_mutex); 2766 if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2767 d[0].state = IP_VS_STATE_MASTER; 2768 strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, 2769 sizeof(d[0].mcast_ifn)); 2770 d[0].syncid = ipvs->mcfg.syncid; 2771 } 2772 if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2773 d[1].state = IP_VS_STATE_BACKUP; 2774 strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, 2775 sizeof(d[1].mcast_ifn)); 2776 d[1].syncid = ipvs->bcfg.syncid; 2777 } 2778 if (copy_to_user(user, &d, sizeof(d)) != 0) 2779 ret = -EFAULT; 2780 mutex_unlock(&ipvs->sync_mutex); 2781 return ret; 2782 } 2783 2784 mutex_lock(&__ip_vs_mutex); 2785 switch (cmd) { 2786 case IP_VS_SO_GET_VERSION: 2787 { 2788 char buf[64]; 2789 2790 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2791 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2792 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2793 ret = -EFAULT; 2794 goto out; 2795 } 2796 *len = strlen(buf)+1; 2797 } 2798 break; 2799 2800 case IP_VS_SO_GET_INFO: 2801 { 2802 struct ip_vs_getinfo info; 2803 info.version = IP_VS_VERSION_CODE; 2804 info.size = ip_vs_conn_tab_size; 2805 info.num_services = ipvs->num_services; 2806 if (copy_to_user(user, &info, sizeof(info)) != 0) 2807 ret = -EFAULT; 2808 } 2809 break; 2810 2811 case IP_VS_SO_GET_SERVICES: 2812 { 2813 struct ip_vs_get_services *get; 2814 int size; 2815 2816 get = (struct ip_vs_get_services *)arg; 2817 size = struct_size(get, entrytable, get->num_services); 2818 if (*len != size) { 2819 pr_err("length: %u != %u\n", *len, size); 2820 ret = -EINVAL; 2821 goto out; 2822 } 2823 ret = __ip_vs_get_service_entries(ipvs, get, user); 2824 } 2825 break; 2826 2827 case IP_VS_SO_GET_SERVICE: 2828 { 2829 struct ip_vs_service_entry *entry; 2830 struct ip_vs_service *svc; 2831 union nf_inet_addr addr; 2832 2833 entry = (struct ip_vs_service_entry *)arg; 2834 addr.ip = entry->addr; 2835 rcu_read_lock(); 2836 if (entry->fwmark) 2837 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark); 2838 else 2839 svc = __ip_vs_service_find(ipvs, AF_INET, 2840 entry->protocol, &addr, 2841 entry->port); 2842 rcu_read_unlock(); 2843 if (svc) { 2844 ip_vs_copy_service(entry, svc); 2845 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2846 ret = -EFAULT; 2847 } else 2848 ret = -ESRCH; 2849 } 2850 break; 2851 2852 case IP_VS_SO_GET_DESTS: 2853 { 2854 struct ip_vs_get_dests *get; 2855 int size; 2856 2857 get = (struct ip_vs_get_dests *)arg; 2858 size = struct_size(get, entrytable, get->num_dests); 2859 if (*len != size) { 2860 pr_err("length: %u != %u\n", *len, size); 2861 ret = -EINVAL; 2862 goto out; 2863 } 2864 ret = __ip_vs_get_dest_entries(ipvs, get, user); 2865 } 2866 break; 2867 2868 case IP_VS_SO_GET_TIMEOUT: 2869 { 2870 struct ip_vs_timeout_user t; 2871 2872 __ip_vs_get_timeouts(ipvs, &t); 2873 if (copy_to_user(user, &t, sizeof(t)) != 0) 2874 ret = -EFAULT; 2875 } 2876 break; 2877 2878 default: 2879 ret = -EINVAL; 2880 } 2881 2882 out: 2883 mutex_unlock(&__ip_vs_mutex); 2884 return ret; 2885 } 2886 2887 2888 static struct nf_sockopt_ops ip_vs_sockopts = { 2889 .pf = PF_INET, 2890 .set_optmin = IP_VS_BASE_CTL, 2891 .set_optmax = IP_VS_SO_SET_MAX+1, 2892 .set = do_ip_vs_set_ctl, 2893 .get_optmin = IP_VS_BASE_CTL, 2894 .get_optmax = IP_VS_SO_GET_MAX+1, 2895 .get = do_ip_vs_get_ctl, 2896 .owner = THIS_MODULE, 2897 }; 2898 2899 /* 2900 * Generic Netlink interface 2901 */ 2902 2903 /* IPVS genetlink family */ 2904 static struct genl_family ip_vs_genl_family; 2905 2906 /* Policy used for first-level command attributes */ 2907 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2908 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2909 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2910 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2911 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2912 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2913 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2914 }; 2915 2916 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2917 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2918 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2919 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2920 .len = IP_VS_IFNAME_MAXLEN - 1 }, 2921 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2922 [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, 2923 [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, 2924 [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) }, 2925 [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 }, 2926 [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 }, 2927 }; 2928 2929 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2930 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2931 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2932 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2933 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2934 .len = sizeof(union nf_inet_addr) }, 2935 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2936 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2937 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2938 .len = IP_VS_SCHEDNAME_MAXLEN - 1 }, 2939 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, 2940 .len = IP_VS_PENAME_MAXLEN }, 2941 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2942 .len = sizeof(struct ip_vs_flags) }, 2943 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2944 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2945 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2946 }; 2947 2948 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2949 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2950 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2951 .len = sizeof(union nf_inet_addr) }, 2952 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2953 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2954 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2955 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2956 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2957 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 2958 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 2959 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 2960 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 2961 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, 2962 [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, 2963 [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, 2964 [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 }, 2965 }; 2966 2967 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 2968 struct ip_vs_kstats *kstats) 2969 { 2970 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 2971 2972 if (!nl_stats) 2973 return -EMSGSIZE; 2974 2975 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || 2976 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || 2977 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || 2978 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 2979 IPVS_STATS_ATTR_PAD) || 2980 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 2981 IPVS_STATS_ATTR_PAD) || 2982 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || 2983 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || 2984 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || 2985 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || 2986 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) 2987 goto nla_put_failure; 2988 nla_nest_end(skb, nl_stats); 2989 2990 return 0; 2991 2992 nla_put_failure: 2993 nla_nest_cancel(skb, nl_stats); 2994 return -EMSGSIZE; 2995 } 2996 2997 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, 2998 struct ip_vs_kstats *kstats) 2999 { 3000 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3001 3002 if (!nl_stats) 3003 return -EMSGSIZE; 3004 3005 if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns, 3006 IPVS_STATS_ATTR_PAD) || 3007 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts, 3008 IPVS_STATS_ATTR_PAD) || 3009 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts, 3010 IPVS_STATS_ATTR_PAD) || 3011 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3012 IPVS_STATS_ATTR_PAD) || 3013 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3014 IPVS_STATS_ATTR_PAD) || 3015 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps, 3016 IPVS_STATS_ATTR_PAD) || 3017 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps, 3018 IPVS_STATS_ATTR_PAD) || 3019 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps, 3020 IPVS_STATS_ATTR_PAD) || 3021 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps, 3022 IPVS_STATS_ATTR_PAD) || 3023 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps, 3024 IPVS_STATS_ATTR_PAD)) 3025 goto nla_put_failure; 3026 nla_nest_end(skb, nl_stats); 3027 3028 return 0; 3029 3030 nla_put_failure: 3031 nla_nest_cancel(skb, nl_stats); 3032 return -EMSGSIZE; 3033 } 3034 3035 static int ip_vs_genl_fill_service(struct sk_buff *skb, 3036 struct ip_vs_service *svc) 3037 { 3038 struct ip_vs_scheduler *sched; 3039 struct ip_vs_pe *pe; 3040 struct nlattr *nl_service; 3041 struct ip_vs_flags flags = { .flags = svc->flags, 3042 .mask = ~0 }; 3043 struct ip_vs_kstats kstats; 3044 char *sched_name; 3045 3046 nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE); 3047 if (!nl_service) 3048 return -EMSGSIZE; 3049 3050 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) 3051 goto nla_put_failure; 3052 if (svc->fwmark) { 3053 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) 3054 goto nla_put_failure; 3055 } else { 3056 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 3057 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 3058 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 3059 goto nla_put_failure; 3060 } 3061 3062 sched = rcu_dereference_protected(svc->scheduler, 1); 3063 sched_name = sched ? sched->name : "none"; 3064 pe = rcu_dereference_protected(svc->pe, 1); 3065 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || 3066 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 3067 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 3068 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 3069 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 3070 goto nla_put_failure; 3071 ip_vs_copy_stats(&kstats, &svc->stats); 3072 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) 3073 goto nla_put_failure; 3074 if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) 3075 goto nla_put_failure; 3076 3077 nla_nest_end(skb, nl_service); 3078 3079 return 0; 3080 3081 nla_put_failure: 3082 nla_nest_cancel(skb, nl_service); 3083 return -EMSGSIZE; 3084 } 3085 3086 static int ip_vs_genl_dump_service(struct sk_buff *skb, 3087 struct ip_vs_service *svc, 3088 struct netlink_callback *cb) 3089 { 3090 void *hdr; 3091 3092 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3093 &ip_vs_genl_family, NLM_F_MULTI, 3094 IPVS_CMD_NEW_SERVICE); 3095 if (!hdr) 3096 return -EMSGSIZE; 3097 3098 if (ip_vs_genl_fill_service(skb, svc) < 0) 3099 goto nla_put_failure; 3100 3101 genlmsg_end(skb, hdr); 3102 return 0; 3103 3104 nla_put_failure: 3105 genlmsg_cancel(skb, hdr); 3106 return -EMSGSIZE; 3107 } 3108 3109 static int ip_vs_genl_dump_services(struct sk_buff *skb, 3110 struct netlink_callback *cb) 3111 { 3112 int idx = 0, i; 3113 int start = cb->args[0]; 3114 struct ip_vs_service *svc; 3115 struct net *net = sock_net(skb->sk); 3116 struct netns_ipvs *ipvs = net_ipvs(net); 3117 3118 mutex_lock(&__ip_vs_mutex); 3119 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3120 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 3121 if (++idx <= start || (svc->ipvs != ipvs)) 3122 continue; 3123 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3124 idx--; 3125 goto nla_put_failure; 3126 } 3127 } 3128 } 3129 3130 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3131 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 3132 if (++idx <= start || (svc->ipvs != ipvs)) 3133 continue; 3134 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3135 idx--; 3136 goto nla_put_failure; 3137 } 3138 } 3139 } 3140 3141 nla_put_failure: 3142 mutex_unlock(&__ip_vs_mutex); 3143 cb->args[0] = idx; 3144 3145 return skb->len; 3146 } 3147 3148 static bool ip_vs_is_af_valid(int af) 3149 { 3150 if (af == AF_INET) 3151 return true; 3152 #ifdef CONFIG_IP_VS_IPV6 3153 if (af == AF_INET6 && ipv6_mod_enabled()) 3154 return true; 3155 #endif 3156 return false; 3157 } 3158 3159 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, 3160 struct ip_vs_service_user_kern *usvc, 3161 struct nlattr *nla, bool full_entry, 3162 struct ip_vs_service **ret_svc) 3163 { 3164 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 3165 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 3166 struct ip_vs_service *svc; 3167 3168 /* Parse mandatory identifying service fields first */ 3169 if (nla == NULL || 3170 nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL)) 3171 return -EINVAL; 3172 3173 nla_af = attrs[IPVS_SVC_ATTR_AF]; 3174 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 3175 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 3176 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 3177 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 3178 3179 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 3180 return -EINVAL; 3181 3182 memset(usvc, 0, sizeof(*usvc)); 3183 3184 usvc->af = nla_get_u16(nla_af); 3185 if (!ip_vs_is_af_valid(usvc->af)) 3186 return -EAFNOSUPPORT; 3187 3188 if (nla_fwmark) { 3189 usvc->protocol = IPPROTO_TCP; 3190 usvc->fwmark = nla_get_u32(nla_fwmark); 3191 } else { 3192 usvc->protocol = nla_get_u16(nla_protocol); 3193 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 3194 usvc->port = nla_get_be16(nla_port); 3195 usvc->fwmark = 0; 3196 } 3197 3198 rcu_read_lock(); 3199 if (usvc->fwmark) 3200 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); 3201 else 3202 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, 3203 &usvc->addr, usvc->port); 3204 rcu_read_unlock(); 3205 *ret_svc = svc; 3206 3207 /* If a full entry was requested, check for the additional fields */ 3208 if (full_entry) { 3209 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, 3210 *nla_netmask; 3211 struct ip_vs_flags flags; 3212 3213 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 3214 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; 3215 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 3216 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 3217 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 3218 3219 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 3220 return -EINVAL; 3221 3222 nla_memcpy(&flags, nla_flags, sizeof(flags)); 3223 3224 /* prefill flags from service if it already exists */ 3225 if (svc) 3226 usvc->flags = svc->flags; 3227 3228 /* set new flags from userland */ 3229 usvc->flags = (usvc->flags & ~flags.mask) | 3230 (flags.flags & flags.mask); 3231 usvc->sched_name = nla_data(nla_sched); 3232 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3233 usvc->timeout = nla_get_u32(nla_timeout); 3234 usvc->netmask = nla_get_be32(nla_netmask); 3235 } 3236 3237 return 0; 3238 } 3239 3240 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, 3241 struct nlattr *nla) 3242 { 3243 struct ip_vs_service_user_kern usvc; 3244 struct ip_vs_service *svc; 3245 int ret; 3246 3247 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc); 3248 return ret ? ERR_PTR(ret) : svc; 3249 } 3250 3251 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 3252 { 3253 struct nlattr *nl_dest; 3254 struct ip_vs_kstats kstats; 3255 3256 nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST); 3257 if (!nl_dest) 3258 return -EMSGSIZE; 3259 3260 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3261 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3262 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3263 (atomic_read(&dest->conn_flags) & 3264 IP_VS_CONN_F_FWD_MASK)) || 3265 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, 3266 atomic_read(&dest->weight)) || 3267 nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE, 3268 dest->tun_type) || 3269 nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, 3270 dest->tun_port) || 3271 nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS, 3272 dest->tun_flags) || 3273 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || 3274 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || 3275 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 3276 atomic_read(&dest->activeconns)) || 3277 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, 3278 atomic_read(&dest->inactconns)) || 3279 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 3280 atomic_read(&dest->persistconns)) || 3281 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) 3282 goto nla_put_failure; 3283 ip_vs_copy_stats(&kstats, &dest->stats); 3284 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) 3285 goto nla_put_failure; 3286 if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) 3287 goto nla_put_failure; 3288 3289 nla_nest_end(skb, nl_dest); 3290 3291 return 0; 3292 3293 nla_put_failure: 3294 nla_nest_cancel(skb, nl_dest); 3295 return -EMSGSIZE; 3296 } 3297 3298 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 3299 struct netlink_callback *cb) 3300 { 3301 void *hdr; 3302 3303 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3304 &ip_vs_genl_family, NLM_F_MULTI, 3305 IPVS_CMD_NEW_DEST); 3306 if (!hdr) 3307 return -EMSGSIZE; 3308 3309 if (ip_vs_genl_fill_dest(skb, dest) < 0) 3310 goto nla_put_failure; 3311 3312 genlmsg_end(skb, hdr); 3313 return 0; 3314 3315 nla_put_failure: 3316 genlmsg_cancel(skb, hdr); 3317 return -EMSGSIZE; 3318 } 3319 3320 static int ip_vs_genl_dump_dests(struct sk_buff *skb, 3321 struct netlink_callback *cb) 3322 { 3323 int idx = 0; 3324 int start = cb->args[0]; 3325 struct ip_vs_service *svc; 3326 struct ip_vs_dest *dest; 3327 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3328 struct net *net = sock_net(skb->sk); 3329 struct netns_ipvs *ipvs = net_ipvs(net); 3330 3331 mutex_lock(&__ip_vs_mutex); 3332 3333 /* Try to find the service for which to dump destinations */ 3334 if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) 3335 goto out_err; 3336 3337 3338 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); 3339 if (IS_ERR_OR_NULL(svc)) 3340 goto out_err; 3341 3342 /* Dump the destinations */ 3343 list_for_each_entry(dest, &svc->destinations, n_list) { 3344 if (++idx <= start) 3345 continue; 3346 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 3347 idx--; 3348 goto nla_put_failure; 3349 } 3350 } 3351 3352 nla_put_failure: 3353 cb->args[0] = idx; 3354 3355 out_err: 3356 mutex_unlock(&__ip_vs_mutex); 3357 3358 return skb->len; 3359 } 3360 3361 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 3362 struct nlattr *nla, bool full_entry) 3363 { 3364 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 3365 struct nlattr *nla_addr, *nla_port; 3366 struct nlattr *nla_addr_family; 3367 3368 /* Parse mandatory identifying destination fields first */ 3369 if (nla == NULL || 3370 nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL)) 3371 return -EINVAL; 3372 3373 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 3374 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 3375 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; 3376 3377 if (!(nla_addr && nla_port)) 3378 return -EINVAL; 3379 3380 memset(udest, 0, sizeof(*udest)); 3381 3382 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3383 udest->port = nla_get_be16(nla_port); 3384 3385 if (nla_addr_family) 3386 udest->af = nla_get_u16(nla_addr_family); 3387 else 3388 udest->af = 0; 3389 3390 /* If a full entry was requested, check for the additional fields */ 3391 if (full_entry) { 3392 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 3393 *nla_l_thresh, *nla_tun_type, *nla_tun_port, 3394 *nla_tun_flags; 3395 3396 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 3397 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 3398 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 3399 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 3400 nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; 3401 nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; 3402 nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS]; 3403 3404 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 3405 return -EINVAL; 3406 3407 udest->conn_flags = nla_get_u32(nla_fwd) 3408 & IP_VS_CONN_F_FWD_MASK; 3409 udest->weight = nla_get_u32(nla_weight); 3410 udest->u_threshold = nla_get_u32(nla_u_thresh); 3411 udest->l_threshold = nla_get_u32(nla_l_thresh); 3412 3413 if (nla_tun_type) 3414 udest->tun_type = nla_get_u8(nla_tun_type); 3415 3416 if (nla_tun_port) 3417 udest->tun_port = nla_get_be16(nla_tun_port); 3418 3419 if (nla_tun_flags) 3420 udest->tun_flags = nla_get_u16(nla_tun_flags); 3421 } 3422 3423 return 0; 3424 } 3425 3426 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, 3427 struct ipvs_sync_daemon_cfg *c) 3428 { 3429 struct nlattr *nl_daemon; 3430 3431 nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON); 3432 if (!nl_daemon) 3433 return -EMSGSIZE; 3434 3435 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || 3436 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) || 3437 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) || 3438 nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) || 3439 nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) || 3440 nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl)) 3441 goto nla_put_failure; 3442 #ifdef CONFIG_IP_VS_IPV6 3443 if (c->mcast_af == AF_INET6) { 3444 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6, 3445 &c->mcast_group.in6)) 3446 goto nla_put_failure; 3447 } else 3448 #endif 3449 if (c->mcast_af == AF_INET && 3450 nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP, 3451 c->mcast_group.ip)) 3452 goto nla_put_failure; 3453 nla_nest_end(skb, nl_daemon); 3454 3455 return 0; 3456 3457 nla_put_failure: 3458 nla_nest_cancel(skb, nl_daemon); 3459 return -EMSGSIZE; 3460 } 3461 3462 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, 3463 struct ipvs_sync_daemon_cfg *c, 3464 struct netlink_callback *cb) 3465 { 3466 void *hdr; 3467 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3468 &ip_vs_genl_family, NLM_F_MULTI, 3469 IPVS_CMD_NEW_DAEMON); 3470 if (!hdr) 3471 return -EMSGSIZE; 3472 3473 if (ip_vs_genl_fill_daemon(skb, state, c)) 3474 goto nla_put_failure; 3475 3476 genlmsg_end(skb, hdr); 3477 return 0; 3478 3479 nla_put_failure: 3480 genlmsg_cancel(skb, hdr); 3481 return -EMSGSIZE; 3482 } 3483 3484 static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3485 struct netlink_callback *cb) 3486 { 3487 struct net *net = sock_net(skb->sk); 3488 struct netns_ipvs *ipvs = net_ipvs(net); 3489 3490 mutex_lock(&ipvs->sync_mutex); 3491 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3492 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3493 &ipvs->mcfg, cb) < 0) 3494 goto nla_put_failure; 3495 3496 cb->args[0] = 1; 3497 } 3498 3499 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3500 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3501 &ipvs->bcfg, cb) < 0) 3502 goto nla_put_failure; 3503 3504 cb->args[1] = 1; 3505 } 3506 3507 nla_put_failure: 3508 mutex_unlock(&ipvs->sync_mutex); 3509 3510 return skb->len; 3511 } 3512 3513 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3514 { 3515 struct ipvs_sync_daemon_cfg c; 3516 struct nlattr *a; 3517 int ret; 3518 3519 memset(&c, 0, sizeof(c)); 3520 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3521 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3522 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3523 return -EINVAL; 3524 strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3525 sizeof(c.mcast_ifn)); 3526 c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); 3527 3528 a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN]; 3529 if (a) 3530 c.sync_maxlen = nla_get_u16(a); 3531 3532 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP]; 3533 if (a) { 3534 c.mcast_af = AF_INET; 3535 c.mcast_group.ip = nla_get_in_addr(a); 3536 if (!ipv4_is_multicast(c.mcast_group.ip)) 3537 return -EINVAL; 3538 } else { 3539 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6]; 3540 if (a) { 3541 #ifdef CONFIG_IP_VS_IPV6 3542 int addr_type; 3543 3544 c.mcast_af = AF_INET6; 3545 c.mcast_group.in6 = nla_get_in6_addr(a); 3546 addr_type = ipv6_addr_type(&c.mcast_group.in6); 3547 if (!(addr_type & IPV6_ADDR_MULTICAST)) 3548 return -EINVAL; 3549 #else 3550 return -EAFNOSUPPORT; 3551 #endif 3552 } 3553 } 3554 3555 a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT]; 3556 if (a) 3557 c.mcast_port = nla_get_u16(a); 3558 3559 a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL]; 3560 if (a) 3561 c.mcast_ttl = nla_get_u8(a); 3562 3563 /* The synchronization protocol is incompatible with mixed family 3564 * services 3565 */ 3566 if (ipvs->mixed_address_family_dests > 0) 3567 return -EINVAL; 3568 3569 ret = start_sync_thread(ipvs, &c, 3570 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3571 return ret; 3572 } 3573 3574 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3575 { 3576 int ret; 3577 3578 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3579 return -EINVAL; 3580 3581 ret = stop_sync_thread(ipvs, 3582 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3583 return ret; 3584 } 3585 3586 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs) 3587 { 3588 struct ip_vs_timeout_user t; 3589 3590 __ip_vs_get_timeouts(ipvs, &t); 3591 3592 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3593 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3594 3595 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 3596 t.tcp_fin_timeout = 3597 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 3598 3599 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3600 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3601 3602 return ip_vs_set_timeout(ipvs, &t); 3603 } 3604 3605 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3606 { 3607 int ret = -EINVAL, cmd; 3608 struct net *net = sock_net(skb->sk); 3609 struct netns_ipvs *ipvs = net_ipvs(net); 3610 3611 cmd = info->genlhdr->cmd; 3612 3613 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3614 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 3615 3616 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 3617 nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack)) 3618 goto out; 3619 3620 if (cmd == IPVS_CMD_NEW_DAEMON) 3621 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs); 3622 else 3623 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs); 3624 } 3625 3626 out: 3627 return ret; 3628 } 3629 3630 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3631 { 3632 bool need_full_svc = false, need_full_dest = false; 3633 struct ip_vs_service *svc = NULL; 3634 struct ip_vs_service_user_kern usvc; 3635 struct ip_vs_dest_user_kern udest; 3636 int ret = 0, cmd; 3637 struct net *net = sock_net(skb->sk); 3638 struct netns_ipvs *ipvs = net_ipvs(net); 3639 3640 cmd = info->genlhdr->cmd; 3641 3642 mutex_lock(&__ip_vs_mutex); 3643 3644 if (cmd == IPVS_CMD_FLUSH) { 3645 ret = ip_vs_flush(ipvs, false); 3646 goto out; 3647 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3648 ret = ip_vs_genl_set_config(ipvs, info->attrs); 3649 goto out; 3650 } else if (cmd == IPVS_CMD_ZERO && 3651 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3652 ret = ip_vs_zero_all(ipvs); 3653 goto out; 3654 } 3655 3656 /* All following commands require a service argument, so check if we 3657 * received a valid one. We need a full service specification when 3658 * adding / editing a service. Only identifying members otherwise. */ 3659 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3660 need_full_svc = true; 3661 3662 ret = ip_vs_genl_parse_service(ipvs, &usvc, 3663 info->attrs[IPVS_CMD_ATTR_SERVICE], 3664 need_full_svc, &svc); 3665 if (ret) 3666 goto out; 3667 3668 /* Unless we're adding a new service, the service must already exist */ 3669 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3670 ret = -ESRCH; 3671 goto out; 3672 } 3673 3674 /* Destination commands require a valid destination argument. For 3675 * adding / editing a destination, we need a full destination 3676 * specification. */ 3677 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3678 cmd == IPVS_CMD_DEL_DEST) { 3679 if (cmd != IPVS_CMD_DEL_DEST) 3680 need_full_dest = true; 3681 3682 ret = ip_vs_genl_parse_dest(&udest, 3683 info->attrs[IPVS_CMD_ATTR_DEST], 3684 need_full_dest); 3685 if (ret) 3686 goto out; 3687 3688 /* Old protocols did not allow the user to specify address 3689 * family, so we set it to zero instead. We also didn't 3690 * allow heterogeneous pools in the old code, so it's safe 3691 * to assume that this will have the same address family as 3692 * the service. 3693 */ 3694 if (udest.af == 0) 3695 udest.af = svc->af; 3696 3697 if (!ip_vs_is_af_valid(udest.af)) { 3698 ret = -EAFNOSUPPORT; 3699 goto out; 3700 } 3701 3702 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { 3703 /* The synchronization protocol is incompatible 3704 * with mixed family services 3705 */ 3706 if (ipvs->sync_state) { 3707 ret = -EINVAL; 3708 goto out; 3709 } 3710 3711 /* Which connection types do we support? */ 3712 switch (udest.conn_flags) { 3713 case IP_VS_CONN_F_TUNNEL: 3714 /* We are able to forward this */ 3715 break; 3716 default: 3717 ret = -EINVAL; 3718 goto out; 3719 } 3720 } 3721 } 3722 3723 switch (cmd) { 3724 case IPVS_CMD_NEW_SERVICE: 3725 if (svc == NULL) 3726 ret = ip_vs_add_service(ipvs, &usvc, &svc); 3727 else 3728 ret = -EEXIST; 3729 break; 3730 case IPVS_CMD_SET_SERVICE: 3731 ret = ip_vs_edit_service(svc, &usvc); 3732 break; 3733 case IPVS_CMD_DEL_SERVICE: 3734 ret = ip_vs_del_service(svc); 3735 /* do not use svc, it can be freed */ 3736 break; 3737 case IPVS_CMD_NEW_DEST: 3738 ret = ip_vs_add_dest(svc, &udest); 3739 break; 3740 case IPVS_CMD_SET_DEST: 3741 ret = ip_vs_edit_dest(svc, &udest); 3742 break; 3743 case IPVS_CMD_DEL_DEST: 3744 ret = ip_vs_del_dest(svc, &udest); 3745 break; 3746 case IPVS_CMD_ZERO: 3747 ret = ip_vs_zero_service(svc); 3748 break; 3749 default: 3750 ret = -EINVAL; 3751 } 3752 3753 out: 3754 mutex_unlock(&__ip_vs_mutex); 3755 3756 return ret; 3757 } 3758 3759 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3760 { 3761 struct sk_buff *msg; 3762 void *reply; 3763 int ret, cmd, reply_cmd; 3764 struct net *net = sock_net(skb->sk); 3765 struct netns_ipvs *ipvs = net_ipvs(net); 3766 3767 cmd = info->genlhdr->cmd; 3768 3769 if (cmd == IPVS_CMD_GET_SERVICE) 3770 reply_cmd = IPVS_CMD_NEW_SERVICE; 3771 else if (cmd == IPVS_CMD_GET_INFO) 3772 reply_cmd = IPVS_CMD_SET_INFO; 3773 else if (cmd == IPVS_CMD_GET_CONFIG) 3774 reply_cmd = IPVS_CMD_SET_CONFIG; 3775 else { 3776 pr_err("unknown Generic Netlink command\n"); 3777 return -EINVAL; 3778 } 3779 3780 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3781 if (!msg) 3782 return -ENOMEM; 3783 3784 mutex_lock(&__ip_vs_mutex); 3785 3786 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3787 if (reply == NULL) 3788 goto nla_put_failure; 3789 3790 switch (cmd) { 3791 case IPVS_CMD_GET_SERVICE: 3792 { 3793 struct ip_vs_service *svc; 3794 3795 svc = ip_vs_genl_find_service(ipvs, 3796 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3797 if (IS_ERR(svc)) { 3798 ret = PTR_ERR(svc); 3799 goto out_err; 3800 } else if (svc) { 3801 ret = ip_vs_genl_fill_service(msg, svc); 3802 if (ret) 3803 goto nla_put_failure; 3804 } else { 3805 ret = -ESRCH; 3806 goto out_err; 3807 } 3808 3809 break; 3810 } 3811 3812 case IPVS_CMD_GET_CONFIG: 3813 { 3814 struct ip_vs_timeout_user t; 3815 3816 __ip_vs_get_timeouts(ipvs, &t); 3817 #ifdef CONFIG_IP_VS_PROTO_TCP 3818 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3819 t.tcp_timeout) || 3820 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3821 t.tcp_fin_timeout)) 3822 goto nla_put_failure; 3823 #endif 3824 #ifdef CONFIG_IP_VS_PROTO_UDP 3825 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) 3826 goto nla_put_failure; 3827 #endif 3828 3829 break; 3830 } 3831 3832 case IPVS_CMD_GET_INFO: 3833 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, 3834 IP_VS_VERSION_CODE) || 3835 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3836 ip_vs_conn_tab_size)) 3837 goto nla_put_failure; 3838 break; 3839 } 3840 3841 genlmsg_end(msg, reply); 3842 ret = genlmsg_reply(msg, info); 3843 goto out; 3844 3845 nla_put_failure: 3846 pr_err("not enough space in Netlink message\n"); 3847 ret = -EMSGSIZE; 3848 3849 out_err: 3850 nlmsg_free(msg); 3851 out: 3852 mutex_unlock(&__ip_vs_mutex); 3853 3854 return ret; 3855 } 3856 3857 3858 static const struct genl_ops ip_vs_genl_ops[] = { 3859 { 3860 .cmd = IPVS_CMD_NEW_SERVICE, 3861 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3862 .flags = GENL_ADMIN_PERM, 3863 .doit = ip_vs_genl_set_cmd, 3864 }, 3865 { 3866 .cmd = IPVS_CMD_SET_SERVICE, 3867 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3868 .flags = GENL_ADMIN_PERM, 3869 .doit = ip_vs_genl_set_cmd, 3870 }, 3871 { 3872 .cmd = IPVS_CMD_DEL_SERVICE, 3873 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3874 .flags = GENL_ADMIN_PERM, 3875 .doit = ip_vs_genl_set_cmd, 3876 }, 3877 { 3878 .cmd = IPVS_CMD_GET_SERVICE, 3879 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3880 .flags = GENL_ADMIN_PERM, 3881 .doit = ip_vs_genl_get_cmd, 3882 .dumpit = ip_vs_genl_dump_services, 3883 }, 3884 { 3885 .cmd = IPVS_CMD_NEW_DEST, 3886 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3887 .flags = GENL_ADMIN_PERM, 3888 .doit = ip_vs_genl_set_cmd, 3889 }, 3890 { 3891 .cmd = IPVS_CMD_SET_DEST, 3892 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3893 .flags = GENL_ADMIN_PERM, 3894 .doit = ip_vs_genl_set_cmd, 3895 }, 3896 { 3897 .cmd = IPVS_CMD_DEL_DEST, 3898 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3899 .flags = GENL_ADMIN_PERM, 3900 .doit = ip_vs_genl_set_cmd, 3901 }, 3902 { 3903 .cmd = IPVS_CMD_GET_DEST, 3904 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3905 .flags = GENL_ADMIN_PERM, 3906 .dumpit = ip_vs_genl_dump_dests, 3907 }, 3908 { 3909 .cmd = IPVS_CMD_NEW_DAEMON, 3910 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3911 .flags = GENL_ADMIN_PERM, 3912 .doit = ip_vs_genl_set_daemon, 3913 }, 3914 { 3915 .cmd = IPVS_CMD_DEL_DAEMON, 3916 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3917 .flags = GENL_ADMIN_PERM, 3918 .doit = ip_vs_genl_set_daemon, 3919 }, 3920 { 3921 .cmd = IPVS_CMD_GET_DAEMON, 3922 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3923 .flags = GENL_ADMIN_PERM, 3924 .dumpit = ip_vs_genl_dump_daemons, 3925 }, 3926 { 3927 .cmd = IPVS_CMD_SET_CONFIG, 3928 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3929 .flags = GENL_ADMIN_PERM, 3930 .doit = ip_vs_genl_set_cmd, 3931 }, 3932 { 3933 .cmd = IPVS_CMD_GET_CONFIG, 3934 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3935 .flags = GENL_ADMIN_PERM, 3936 .doit = ip_vs_genl_get_cmd, 3937 }, 3938 { 3939 .cmd = IPVS_CMD_GET_INFO, 3940 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3941 .flags = GENL_ADMIN_PERM, 3942 .doit = ip_vs_genl_get_cmd, 3943 }, 3944 { 3945 .cmd = IPVS_CMD_ZERO, 3946 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3947 .flags = GENL_ADMIN_PERM, 3948 .doit = ip_vs_genl_set_cmd, 3949 }, 3950 { 3951 .cmd = IPVS_CMD_FLUSH, 3952 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3953 .flags = GENL_ADMIN_PERM, 3954 .doit = ip_vs_genl_set_cmd, 3955 }, 3956 }; 3957 3958 static struct genl_family ip_vs_genl_family __ro_after_init = { 3959 .hdrsize = 0, 3960 .name = IPVS_GENL_NAME, 3961 .version = IPVS_GENL_VERSION, 3962 .maxattr = IPVS_CMD_ATTR_MAX, 3963 .policy = ip_vs_cmd_policy, 3964 .netnsok = true, /* Make ipvsadm to work on netns */ 3965 .module = THIS_MODULE, 3966 .ops = ip_vs_genl_ops, 3967 .n_ops = ARRAY_SIZE(ip_vs_genl_ops), 3968 }; 3969 3970 static int __init ip_vs_genl_register(void) 3971 { 3972 return genl_register_family(&ip_vs_genl_family); 3973 } 3974 3975 static void ip_vs_genl_unregister(void) 3976 { 3977 genl_unregister_family(&ip_vs_genl_family); 3978 } 3979 3980 /* End of Generic Netlink interface definitions */ 3981 3982 /* 3983 * per netns intit/exit func. 3984 */ 3985 #ifdef CONFIG_SYSCTL 3986 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) 3987 { 3988 struct net *net = ipvs->net; 3989 int idx; 3990 struct ctl_table *tbl; 3991 3992 atomic_set(&ipvs->dropentry, 0); 3993 spin_lock_init(&ipvs->dropentry_lock); 3994 spin_lock_init(&ipvs->droppacket_lock); 3995 spin_lock_init(&ipvs->securetcp_lock); 3996 3997 if (!net_eq(net, &init_net)) { 3998 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 3999 if (tbl == NULL) 4000 return -ENOMEM; 4001 4002 /* Don't export sysctls to unprivileged users */ 4003 if (net->user_ns != &init_user_ns) 4004 tbl[0].procname = NULL; 4005 } else 4006 tbl = vs_vars; 4007 /* Initialize sysctl defaults */ 4008 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) { 4009 if (tbl[idx].proc_handler == proc_do_defense_mode) 4010 tbl[idx].extra2 = ipvs; 4011 } 4012 idx = 0; 4013 ipvs->sysctl_amemthresh = 1024; 4014 tbl[idx++].data = &ipvs->sysctl_amemthresh; 4015 ipvs->sysctl_am_droprate = 10; 4016 tbl[idx++].data = &ipvs->sysctl_am_droprate; 4017 tbl[idx++].data = &ipvs->sysctl_drop_entry; 4018 tbl[idx++].data = &ipvs->sysctl_drop_packet; 4019 #ifdef CONFIG_IP_VS_NFCT 4020 tbl[idx++].data = &ipvs->sysctl_conntrack; 4021 #endif 4022 tbl[idx++].data = &ipvs->sysctl_secure_tcp; 4023 ipvs->sysctl_snat_reroute = 1; 4024 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 4025 ipvs->sysctl_sync_ver = 1; 4026 tbl[idx++].data = &ipvs->sysctl_sync_ver; 4027 ipvs->sysctl_sync_ports = 1; 4028 tbl[idx++].data = &ipvs->sysctl_sync_ports; 4029 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; 4030 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 4031 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 4032 ipvs->sysctl_sync_sock_size = 0; 4033 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 4034 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 4035 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 4036 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; 4037 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; 4038 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 4039 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 4040 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 4041 tbl[idx].data = &ipvs->sysctl_sync_threshold; 4042 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 4043 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 4044 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; 4045 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); 4046 tbl[idx++].data = &ipvs->sysctl_sync_retries; 4047 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 4048 ipvs->sysctl_pmtu_disc = 1; 4049 tbl[idx++].data = &ipvs->sysctl_pmtu_disc; 4050 tbl[idx++].data = &ipvs->sysctl_backup_only; 4051 ipvs->sysctl_conn_reuse_mode = 1; 4052 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; 4053 tbl[idx++].data = &ipvs->sysctl_schedule_icmp; 4054 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; 4055 4056 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 4057 if (ipvs->sysctl_hdr == NULL) { 4058 if (!net_eq(net, &init_net)) 4059 kfree(tbl); 4060 return -ENOMEM; 4061 } 4062 ip_vs_start_estimator(ipvs, &ipvs->tot_stats); 4063 ipvs->sysctl_tbl = tbl; 4064 /* Schedule defense work */ 4065 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 4066 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 4067 4068 return 0; 4069 } 4070 4071 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) 4072 { 4073 struct net *net = ipvs->net; 4074 4075 cancel_delayed_work_sync(&ipvs->defense_work); 4076 cancel_work_sync(&ipvs->defense_work.work); 4077 unregister_net_sysctl_table(ipvs->sysctl_hdr); 4078 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); 4079 4080 if (!net_eq(net, &init_net)) 4081 kfree(ipvs->sysctl_tbl); 4082 } 4083 4084 #else 4085 4086 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; } 4087 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { } 4088 4089 #endif 4090 4091 static struct notifier_block ip_vs_dst_notifier = { 4092 .notifier_call = ip_vs_dst_event, 4093 #ifdef CONFIG_IP_VS_IPV6 4094 .priority = ADDRCONF_NOTIFY_PRIORITY + 5, 4095 #endif 4096 }; 4097 4098 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) 4099 { 4100 int i, idx; 4101 4102 /* Initialize rs_table */ 4103 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 4104 INIT_HLIST_HEAD(&ipvs->rs_table[idx]); 4105 4106 INIT_LIST_HEAD(&ipvs->dest_trash); 4107 spin_lock_init(&ipvs->dest_trash_lock); 4108 timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0); 4109 atomic_set(&ipvs->ftpsvc_counter, 0); 4110 atomic_set(&ipvs->nullsvc_counter, 0); 4111 atomic_set(&ipvs->conn_out_counter, 0); 4112 4113 /* procfs stats */ 4114 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 4115 if (!ipvs->tot_stats.cpustats) 4116 return -ENOMEM; 4117 4118 for_each_possible_cpu(i) { 4119 struct ip_vs_cpu_stats *ipvs_tot_stats; 4120 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); 4121 u64_stats_init(&ipvs_tot_stats->syncp); 4122 } 4123 4124 spin_lock_init(&ipvs->tot_stats.lock); 4125 4126 proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops, 4127 sizeof(struct ip_vs_iter)); 4128 proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, 4129 ip_vs_stats_show, NULL); 4130 proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net, 4131 ip_vs_stats_percpu_show, NULL); 4132 4133 if (ip_vs_control_net_init_sysctl(ipvs)) 4134 goto err; 4135 4136 return 0; 4137 4138 err: 4139 free_percpu(ipvs->tot_stats.cpustats); 4140 return -ENOMEM; 4141 } 4142 4143 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) 4144 { 4145 ip_vs_trash_cleanup(ipvs); 4146 ip_vs_control_net_cleanup_sysctl(ipvs); 4147 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4148 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4149 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4150 free_percpu(ipvs->tot_stats.cpustats); 4151 } 4152 4153 int __init ip_vs_register_nl_ioctl(void) 4154 { 4155 int ret; 4156 4157 ret = nf_register_sockopt(&ip_vs_sockopts); 4158 if (ret) { 4159 pr_err("cannot register sockopt.\n"); 4160 goto err_sock; 4161 } 4162 4163 ret = ip_vs_genl_register(); 4164 if (ret) { 4165 pr_err("cannot register Generic Netlink interface.\n"); 4166 goto err_genl; 4167 } 4168 return 0; 4169 4170 err_genl: 4171 nf_unregister_sockopt(&ip_vs_sockopts); 4172 err_sock: 4173 return ret; 4174 } 4175 4176 void ip_vs_unregister_nl_ioctl(void) 4177 { 4178 ip_vs_genl_unregister(); 4179 nf_unregister_sockopt(&ip_vs_sockopts); 4180 } 4181 4182 int __init ip_vs_control_init(void) 4183 { 4184 int idx; 4185 int ret; 4186 4187 EnterFunction(2); 4188 4189 /* Initialize svc_table, ip_vs_svc_fwm_table */ 4190 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 4191 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); 4192 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); 4193 } 4194 4195 smp_wmb(); /* Do we really need it now ? */ 4196 4197 ret = register_netdevice_notifier(&ip_vs_dst_notifier); 4198 if (ret < 0) 4199 return ret; 4200 4201 LeaveFunction(2); 4202 return 0; 4203 } 4204 4205 4206 void ip_vs_control_cleanup(void) 4207 { 4208 EnterFunction(2); 4209 unregister_netdevice_notifier(&ip_vs_dst_notifier); 4210 LeaveFunction(2); 4211 } 4212