1 /* 2 * IPVS: Locality-Based Least-Connection with Replication scheduler 3 * 4 * Authors: Wensong Zhang <wensong@gnuchina.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Changes: 12 * Julian Anastasov : Added the missing (dest->weight>0) 13 * condition in the ip_vs_dest_set_max. 14 * 15 */ 16 17 /* 18 * The lblc/r algorithm is as follows (pseudo code): 19 * 20 * if serverSet[dest_ip] is null then 21 * n, serverSet[dest_ip] <- {weighted least-conn node}; 22 * else 23 * n <- {least-conn (alive) node in serverSet[dest_ip]}; 24 * if (n is null) OR 25 * (n.conns>n.weight AND 26 * there is a node m with m.conns<m.weight/2) then 27 * n <- {weighted least-conn node}; 28 * add n to serverSet[dest_ip]; 29 * if |serverSet[dest_ip]| > 1 AND 30 * now - serverSet[dest_ip].lastMod > T then 31 * m <- {most conn node in serverSet[dest_ip]}; 32 * remove m from serverSet[dest_ip]; 33 * if serverSet[dest_ip] changed then 34 * serverSet[dest_ip].lastMod <- now; 35 * 36 * return n; 37 * 38 */ 39 40 #define KMSG_COMPONENT "IPVS" 41 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 42 43 #include <linux/ip.h> 44 #include <linux/module.h> 45 #include <linux/kernel.h> 46 #include <linux/skbuff.h> 47 #include <linux/jiffies.h> 48 #include <linux/list.h> 49 #include <linux/slab.h> 50 51 /* for sysctl */ 52 #include <linux/fs.h> 53 #include <linux/sysctl.h> 54 #include <net/net_namespace.h> 55 56 #include <net/ip_vs.h> 57 58 59 /* 60 * It is for garbage collection of stale IPVS lblcr entries, 61 * when the table is full. 62 */ 63 #define CHECK_EXPIRE_INTERVAL (60*HZ) 64 #define ENTRY_TIMEOUT (6*60*HZ) 65 66 #define DEFAULT_EXPIRATION (24*60*60*HZ) 67 68 /* 69 * It is for full expiration check. 70 * When there is no partial expiration check (garbage collection) 71 * in a half hour, do a full expiration check to collect stale 72 * entries that haven't been touched for a day. 73 */ 74 #define COUNT_FOR_FULL_EXPIRATION 30 75 76 /* 77 * for IPVS lblcr entry hash table 78 */ 79 #ifndef CONFIG_IP_VS_LBLCR_TAB_BITS 80 #define CONFIG_IP_VS_LBLCR_TAB_BITS 10 81 #endif 82 #define IP_VS_LBLCR_TAB_BITS CONFIG_IP_VS_LBLCR_TAB_BITS 83 #define IP_VS_LBLCR_TAB_SIZE (1 << IP_VS_LBLCR_TAB_BITS) 84 #define IP_VS_LBLCR_TAB_MASK (IP_VS_LBLCR_TAB_SIZE - 1) 85 86 87 /* 88 * IPVS destination set structure and operations 89 */ 90 struct ip_vs_dest_set_elem { 91 struct list_head list; /* list link */ 92 struct ip_vs_dest *dest; /* destination server */ 93 struct rcu_head rcu_head; 94 }; 95 96 struct ip_vs_dest_set { 97 atomic_t size; /* set size */ 98 unsigned long lastmod; /* last modified time */ 99 struct list_head list; /* destination list */ 100 }; 101 102 103 static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, 104 struct ip_vs_dest *dest, bool check) 105 { 106 struct ip_vs_dest_set_elem *e; 107 108 if (check) { 109 list_for_each_entry(e, &set->list, list) { 110 if (e->dest == dest) 111 return; 112 } 113 } 114 115 e = kmalloc(sizeof(*e), GFP_ATOMIC); 116 if (e == NULL) 117 return; 118 119 ip_vs_dest_hold(dest); 120 e->dest = dest; 121 122 list_add_rcu(&e->list, &set->list); 123 atomic_inc(&set->size); 124 125 set->lastmod = jiffies; 126 } 127 128 static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) 129 { 130 struct ip_vs_dest_set_elem *e; 131 132 e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); 133 ip_vs_dest_put_and_free(e->dest); 134 kfree(e); 135 } 136 137 static void 138 ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) 139 { 140 struct ip_vs_dest_set_elem *e; 141 142 list_for_each_entry(e, &set->list, list) { 143 if (e->dest == dest) { 144 /* HIT */ 145 atomic_dec(&set->size); 146 set->lastmod = jiffies; 147 list_del_rcu(&e->list); 148 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); 149 break; 150 } 151 } 152 } 153 154 static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) 155 { 156 struct ip_vs_dest_set_elem *e, *ep; 157 158 list_for_each_entry_safe(e, ep, &set->list, list) { 159 list_del_rcu(&e->list); 160 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); 161 } 162 } 163 164 /* get weighted least-connection node in the destination set */ 165 static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) 166 { 167 register struct ip_vs_dest_set_elem *e; 168 struct ip_vs_dest *dest, *least; 169 int loh, doh; 170 171 /* select the first destination server, whose weight > 0 */ 172 list_for_each_entry_rcu(e, &set->list, list) { 173 least = e->dest; 174 if (least->flags & IP_VS_DEST_F_OVERLOAD) 175 continue; 176 177 if ((atomic_read(&least->weight) > 0) 178 && (least->flags & IP_VS_DEST_F_AVAILABLE)) { 179 loh = ip_vs_dest_conn_overhead(least); 180 goto nextstage; 181 } 182 } 183 return NULL; 184 185 /* find the destination with the weighted least load */ 186 nextstage: 187 list_for_each_entry_continue_rcu(e, &set->list, list) { 188 dest = e->dest; 189 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 190 continue; 191 192 doh = ip_vs_dest_conn_overhead(dest); 193 if (((__s64)loh * atomic_read(&dest->weight) > 194 (__s64)doh * atomic_read(&least->weight)) 195 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 196 least = dest; 197 loh = doh; 198 } 199 } 200 201 IP_VS_DBG_BUF(6, "%s(): server %s:%d " 202 "activeconns %d refcnt %d weight %d overhead %d\n", 203 __func__, 204 IP_VS_DBG_ADDR(least->af, &least->addr), 205 ntohs(least->port), 206 atomic_read(&least->activeconns), 207 atomic_read(&least->refcnt), 208 atomic_read(&least->weight), loh); 209 return least; 210 } 211 212 213 /* get weighted most-connection node in the destination set */ 214 static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) 215 { 216 register struct ip_vs_dest_set_elem *e; 217 struct ip_vs_dest *dest, *most; 218 int moh, doh; 219 220 if (set == NULL) 221 return NULL; 222 223 /* select the first destination server, whose weight > 0 */ 224 list_for_each_entry(e, &set->list, list) { 225 most = e->dest; 226 if (atomic_read(&most->weight) > 0) { 227 moh = ip_vs_dest_conn_overhead(most); 228 goto nextstage; 229 } 230 } 231 return NULL; 232 233 /* find the destination with the weighted most load */ 234 nextstage: 235 list_for_each_entry_continue(e, &set->list, list) { 236 dest = e->dest; 237 doh = ip_vs_dest_conn_overhead(dest); 238 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 239 if (((__s64)moh * atomic_read(&dest->weight) < 240 (__s64)doh * atomic_read(&most->weight)) 241 && (atomic_read(&dest->weight) > 0)) { 242 most = dest; 243 moh = doh; 244 } 245 } 246 247 IP_VS_DBG_BUF(6, "%s(): server %s:%d " 248 "activeconns %d refcnt %d weight %d overhead %d\n", 249 __func__, 250 IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port), 251 atomic_read(&most->activeconns), 252 atomic_read(&most->refcnt), 253 atomic_read(&most->weight), moh); 254 return most; 255 } 256 257 258 /* 259 * IPVS lblcr entry represents an association between destination 260 * IP address and its destination server set 261 */ 262 struct ip_vs_lblcr_entry { 263 struct hlist_node list; 264 int af; /* address family */ 265 union nf_inet_addr addr; /* destination IP address */ 266 struct ip_vs_dest_set set; /* destination server set */ 267 unsigned long lastuse; /* last used time */ 268 struct rcu_head rcu_head; 269 }; 270 271 272 /* 273 * IPVS lblcr hash table 274 */ 275 struct ip_vs_lblcr_table { 276 struct rcu_head rcu_head; 277 struct hlist_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ 278 atomic_t entries; /* number of entries */ 279 int max_size; /* maximum size of entries */ 280 struct timer_list periodic_timer; /* collect stale entries */ 281 int rover; /* rover for expire check */ 282 int counter; /* counter for no expire */ 283 bool dead; 284 }; 285 286 287 #ifdef CONFIG_SYSCTL 288 /* 289 * IPVS LBLCR sysctl table 290 */ 291 292 static struct ctl_table vs_vars_table[] = { 293 { 294 .procname = "lblcr_expiration", 295 .data = NULL, 296 .maxlen = sizeof(int), 297 .mode = 0644, 298 .proc_handler = proc_dointvec_jiffies, 299 }, 300 { } 301 }; 302 #endif 303 304 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 305 { 306 hlist_del_rcu(&en->list); 307 ip_vs_dest_set_eraseall(&en->set); 308 kfree_rcu(en, rcu_head); 309 } 310 311 312 /* 313 * Returns hash value for IPVS LBLCR entry 314 */ 315 static inline unsigned int 316 ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr) 317 { 318 __be32 addr_fold = addr->ip; 319 320 #ifdef CONFIG_IP_VS_IPV6 321 if (af == AF_INET6) 322 addr_fold = addr->ip6[0]^addr->ip6[1]^ 323 addr->ip6[2]^addr->ip6[3]; 324 #endif 325 return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK; 326 } 327 328 329 /* 330 * Hash an entry in the ip_vs_lblcr_table. 331 * returns bool success. 332 */ 333 static void 334 ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) 335 { 336 unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr); 337 338 hlist_add_head_rcu(&en->list, &tbl->bucket[hash]); 339 atomic_inc(&tbl->entries); 340 } 341 342 343 /* Get ip_vs_lblcr_entry associated with supplied parameters. */ 344 static inline struct ip_vs_lblcr_entry * 345 ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, 346 const union nf_inet_addr *addr) 347 { 348 unsigned int hash = ip_vs_lblcr_hashkey(af, addr); 349 struct ip_vs_lblcr_entry *en; 350 351 hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list) 352 if (ip_vs_addr_equal(af, &en->addr, addr)) 353 return en; 354 355 return NULL; 356 } 357 358 359 /* 360 * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination 361 * IP address to a server. Called under spin lock. 362 */ 363 static inline struct ip_vs_lblcr_entry * 364 ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, 365 struct ip_vs_dest *dest) 366 { 367 struct ip_vs_lblcr_entry *en; 368 369 en = ip_vs_lblcr_get(dest->af, tbl, daddr); 370 if (!en) { 371 en = kmalloc(sizeof(*en), GFP_ATOMIC); 372 if (!en) 373 return NULL; 374 375 en->af = dest->af; 376 ip_vs_addr_copy(dest->af, &en->addr, daddr); 377 en->lastuse = jiffies; 378 379 /* initialize its dest set */ 380 atomic_set(&(en->set.size), 0); 381 INIT_LIST_HEAD(&en->set.list); 382 383 ip_vs_dest_set_insert(&en->set, dest, false); 384 385 ip_vs_lblcr_hash(tbl, en); 386 return en; 387 } 388 389 ip_vs_dest_set_insert(&en->set, dest, true); 390 391 return en; 392 } 393 394 395 /* 396 * Flush all the entries of the specified table. 397 */ 398 static void ip_vs_lblcr_flush(struct ip_vs_service *svc) 399 { 400 struct ip_vs_lblcr_table *tbl = svc->sched_data; 401 int i; 402 struct ip_vs_lblcr_entry *en; 403 struct hlist_node *next; 404 405 spin_lock_bh(&svc->sched_lock); 406 tbl->dead = 1; 407 for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) { 408 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { 409 ip_vs_lblcr_free(en); 410 } 411 } 412 spin_unlock_bh(&svc->sched_lock); 413 } 414 415 static int sysctl_lblcr_expiration(struct ip_vs_service *svc) 416 { 417 #ifdef CONFIG_SYSCTL 418 struct netns_ipvs *ipvs = net_ipvs(svc->net); 419 return ipvs->sysctl_lblcr_expiration; 420 #else 421 return DEFAULT_EXPIRATION; 422 #endif 423 } 424 425 static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) 426 { 427 struct ip_vs_lblcr_table *tbl = svc->sched_data; 428 unsigned long now = jiffies; 429 int i, j; 430 struct ip_vs_lblcr_entry *en; 431 struct hlist_node *next; 432 433 for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) { 434 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 435 436 spin_lock(&svc->sched_lock); 437 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { 438 if (time_after(en->lastuse + 439 sysctl_lblcr_expiration(svc), now)) 440 continue; 441 442 ip_vs_lblcr_free(en); 443 atomic_dec(&tbl->entries); 444 } 445 spin_unlock(&svc->sched_lock); 446 } 447 tbl->rover = j; 448 } 449 450 451 /* 452 * Periodical timer handler for IPVS lblcr table 453 * It is used to collect stale entries when the number of entries 454 * exceeds the maximum size of the table. 455 * 456 * Fixme: we probably need more complicated algorithm to collect 457 * entries that have not been used for a long time even 458 * if the number of entries doesn't exceed the maximum size 459 * of the table. 460 * The full expiration check is for this purpose now. 461 */ 462 static void ip_vs_lblcr_check_expire(unsigned long data) 463 { 464 struct ip_vs_service *svc = (struct ip_vs_service *) data; 465 struct ip_vs_lblcr_table *tbl = svc->sched_data; 466 unsigned long now = jiffies; 467 int goal; 468 int i, j; 469 struct ip_vs_lblcr_entry *en; 470 struct hlist_node *next; 471 472 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { 473 /* do full expiration check */ 474 ip_vs_lblcr_full_check(svc); 475 tbl->counter = 1; 476 goto out; 477 } 478 479 if (atomic_read(&tbl->entries) <= tbl->max_size) { 480 tbl->counter++; 481 goto out; 482 } 483 484 goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3; 485 if (goal > tbl->max_size/2) 486 goal = tbl->max_size/2; 487 488 for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) { 489 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 490 491 spin_lock(&svc->sched_lock); 492 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { 493 if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) 494 continue; 495 496 ip_vs_lblcr_free(en); 497 atomic_dec(&tbl->entries); 498 goal--; 499 } 500 spin_unlock(&svc->sched_lock); 501 if (goal <= 0) 502 break; 503 } 504 tbl->rover = j; 505 506 out: 507 mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL); 508 } 509 510 static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) 511 { 512 int i; 513 struct ip_vs_lblcr_table *tbl; 514 515 /* 516 * Allocate the ip_vs_lblcr_table for this service 517 */ 518 tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); 519 if (tbl == NULL) 520 return -ENOMEM; 521 522 svc->sched_data = tbl; 523 IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " 524 "current service\n", sizeof(*tbl)); 525 526 /* 527 * Initialize the hash buckets 528 */ 529 for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) { 530 INIT_HLIST_HEAD(&tbl->bucket[i]); 531 } 532 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; 533 tbl->rover = 0; 534 tbl->counter = 1; 535 tbl->dead = 0; 536 537 /* 538 * Hook periodic timer for garbage collection 539 */ 540 setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 541 (unsigned long)svc); 542 mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); 543 544 return 0; 545 } 546 547 548 static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc) 549 { 550 struct ip_vs_lblcr_table *tbl = svc->sched_data; 551 552 /* remove periodic timer */ 553 del_timer_sync(&tbl->periodic_timer); 554 555 /* got to clean up table entries here */ 556 ip_vs_lblcr_flush(svc); 557 558 /* release the table itself */ 559 kfree_rcu(tbl, rcu_head); 560 IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", 561 sizeof(*tbl)); 562 } 563 564 565 static inline struct ip_vs_dest * 566 __ip_vs_lblcr_schedule(struct ip_vs_service *svc) 567 { 568 struct ip_vs_dest *dest, *least; 569 int loh, doh; 570 571 /* 572 * We use the following formula to estimate the load: 573 * (dest overhead) / dest->weight 574 * 575 * Remember -- no floats in kernel mode!!! 576 * The comparison of h1*w2 > h2*w1 is equivalent to that of 577 * h1/w1 > h2/w2 578 * if every weight is larger than zero. 579 * 580 * The server with weight=0 is quiesced and will not receive any 581 * new connection. 582 */ 583 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 584 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 585 continue; 586 587 if (atomic_read(&dest->weight) > 0) { 588 least = dest; 589 loh = ip_vs_dest_conn_overhead(least); 590 goto nextstage; 591 } 592 } 593 return NULL; 594 595 /* 596 * Find the destination with the least load. 597 */ 598 nextstage: 599 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { 600 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 601 continue; 602 603 doh = ip_vs_dest_conn_overhead(dest); 604 if ((__s64)loh * atomic_read(&dest->weight) > 605 (__s64)doh * atomic_read(&least->weight)) { 606 least = dest; 607 loh = doh; 608 } 609 } 610 611 IP_VS_DBG_BUF(6, "LBLCR: server %s:%d " 612 "activeconns %d refcnt %d weight %d overhead %d\n", 613 IP_VS_DBG_ADDR(least->af, &least->addr), 614 ntohs(least->port), 615 atomic_read(&least->activeconns), 616 atomic_read(&least->refcnt), 617 atomic_read(&least->weight), loh); 618 619 return least; 620 } 621 622 623 /* 624 * If this destination server is overloaded and there is a less loaded 625 * server, then return true. 626 */ 627 static inline int 628 is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) 629 { 630 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { 631 struct ip_vs_dest *d; 632 633 list_for_each_entry_rcu(d, &svc->destinations, n_list) { 634 if (atomic_read(&d->activeconns)*2 635 < atomic_read(&d->weight)) { 636 return 1; 637 } 638 } 639 } 640 return 0; 641 } 642 643 644 /* 645 * Locality-Based (weighted) Least-Connection scheduling 646 */ 647 static struct ip_vs_dest * 648 ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, 649 struct ip_vs_iphdr *iph) 650 { 651 struct ip_vs_lblcr_table *tbl = svc->sched_data; 652 struct ip_vs_dest *dest; 653 struct ip_vs_lblcr_entry *en; 654 655 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 656 657 /* First look in our cache */ 658 en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr); 659 if (en) { 660 en->lastuse = jiffies; 661 662 /* Get the least loaded destination */ 663 dest = ip_vs_dest_set_min(&en->set); 664 665 /* More than one destination + enough time passed by, cleanup */ 666 if (atomic_read(&en->set.size) > 1 && 667 time_after(jiffies, en->set.lastmod + 668 sysctl_lblcr_expiration(svc))) { 669 spin_lock_bh(&svc->sched_lock); 670 if (atomic_read(&en->set.size) > 1) { 671 struct ip_vs_dest *m; 672 673 m = ip_vs_dest_set_max(&en->set); 674 if (m) 675 ip_vs_dest_set_erase(&en->set, m); 676 } 677 spin_unlock_bh(&svc->sched_lock); 678 } 679 680 /* If the destination is not overloaded, use it */ 681 if (dest && !is_overloaded(dest, svc)) 682 goto out; 683 684 /* The cache entry is invalid, time to schedule */ 685 dest = __ip_vs_lblcr_schedule(svc); 686 if (!dest) { 687 ip_vs_scheduler_err(svc, "no destination available"); 688 return NULL; 689 } 690 691 /* Update our cache entry */ 692 spin_lock_bh(&svc->sched_lock); 693 if (!tbl->dead) 694 ip_vs_dest_set_insert(&en->set, dest, true); 695 spin_unlock_bh(&svc->sched_lock); 696 goto out; 697 } 698 699 /* No cache entry, time to schedule */ 700 dest = __ip_vs_lblcr_schedule(svc); 701 if (!dest) { 702 IP_VS_DBG(1, "no destination available\n"); 703 return NULL; 704 } 705 706 /* If we fail to create a cache entry, we'll just use the valid dest */ 707 spin_lock_bh(&svc->sched_lock); 708 if (!tbl->dead) 709 ip_vs_lblcr_new(tbl, &iph->daddr, dest); 710 spin_unlock_bh(&svc->sched_lock); 711 712 out: 713 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", 714 IP_VS_DBG_ADDR(svc->af, &iph->daddr), 715 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); 716 717 return dest; 718 } 719 720 721 /* 722 * IPVS LBLCR Scheduler structure 723 */ 724 static struct ip_vs_scheduler ip_vs_lblcr_scheduler = 725 { 726 .name = "lblcr", 727 .refcnt = ATOMIC_INIT(0), 728 .module = THIS_MODULE, 729 .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), 730 .init_service = ip_vs_lblcr_init_svc, 731 .done_service = ip_vs_lblcr_done_svc, 732 .schedule = ip_vs_lblcr_schedule, 733 }; 734 735 /* 736 * per netns init. 737 */ 738 #ifdef CONFIG_SYSCTL 739 static int __net_init __ip_vs_lblcr_init(struct net *net) 740 { 741 struct netns_ipvs *ipvs = net_ipvs(net); 742 743 if (!ipvs) 744 return -ENOENT; 745 746 if (!net_eq(net, &init_net)) { 747 ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, 748 sizeof(vs_vars_table), 749 GFP_KERNEL); 750 if (ipvs->lblcr_ctl_table == NULL) 751 return -ENOMEM; 752 753 /* Don't export sysctls to unprivileged users */ 754 if (net->user_ns != &init_user_ns) 755 ipvs->lblcr_ctl_table[0].procname = NULL; 756 } else 757 ipvs->lblcr_ctl_table = vs_vars_table; 758 ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; 759 ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; 760 761 ipvs->lblcr_ctl_header = 762 register_net_sysctl(net, "net/ipv4/vs", ipvs->lblcr_ctl_table); 763 if (!ipvs->lblcr_ctl_header) { 764 if (!net_eq(net, &init_net)) 765 kfree(ipvs->lblcr_ctl_table); 766 return -ENOMEM; 767 } 768 769 return 0; 770 } 771 772 static void __net_exit __ip_vs_lblcr_exit(struct net *net) 773 { 774 struct netns_ipvs *ipvs = net_ipvs(net); 775 776 unregister_net_sysctl_table(ipvs->lblcr_ctl_header); 777 778 if (!net_eq(net, &init_net)) 779 kfree(ipvs->lblcr_ctl_table); 780 } 781 782 #else 783 784 static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; } 785 static void __net_exit __ip_vs_lblcr_exit(struct net *net) { } 786 787 #endif 788 789 static struct pernet_operations ip_vs_lblcr_ops = { 790 .init = __ip_vs_lblcr_init, 791 .exit = __ip_vs_lblcr_exit, 792 }; 793 794 static int __init ip_vs_lblcr_init(void) 795 { 796 int ret; 797 798 ret = register_pernet_subsys(&ip_vs_lblcr_ops); 799 if (ret) 800 return ret; 801 802 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 803 if (ret) 804 unregister_pernet_subsys(&ip_vs_lblcr_ops); 805 return ret; 806 } 807 808 static void __exit ip_vs_lblcr_cleanup(void) 809 { 810 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 811 unregister_pernet_subsys(&ip_vs_lblcr_ops); 812 rcu_barrier(); 813 } 814 815 816 module_init(ip_vs_lblcr_init); 817 module_exit(ip_vs_lblcr_cleanup); 818 MODULE_LICENSE("GPL"); 819