1 /* 2 * IPVS: Locality-Based Least-Connection with Replication scheduler 3 * 4 * Authors: Wensong Zhang <wensong@gnuchina.org> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Changes: 12 * Julian Anastasov : Added the missing (dest->weight>0) 13 * condition in the ip_vs_dest_set_max. 14 * 15 */ 16 17 /* 18 * The lblc/r algorithm is as follows (pseudo code): 19 * 20 * if serverSet[dest_ip] is null then 21 * n, serverSet[dest_ip] <- {weighted least-conn node}; 22 * else 23 * n <- {least-conn (alive) node in serverSet[dest_ip]}; 24 * if (n is null) OR 25 * (n.conns>n.weight AND 26 * there is a node m with m.conns<m.weight/2) then 27 * n <- {weighted least-conn node}; 28 * add n to serverSet[dest_ip]; 29 * if |serverSet[dest_ip]| > 1 AND 30 * now - serverSet[dest_ip].lastMod > T then 31 * m <- {most conn node in serverSet[dest_ip]}; 32 * remove m from serverSet[dest_ip]; 33 * if serverSet[dest_ip] changed then 34 * serverSet[dest_ip].lastMod <- now; 35 * 36 * return n; 37 * 38 */ 39 40 #define KMSG_COMPONENT "IPVS" 41 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 42 43 #include <linux/ip.h> 44 #include <linux/module.h> 45 #include <linux/kernel.h> 46 #include <linux/skbuff.h> 47 #include <linux/jiffies.h> 48 #include <linux/list.h> 49 #include <linux/slab.h> 50 #include <linux/hash.h> 51 52 /* for sysctl */ 53 #include <linux/fs.h> 54 #include <linux/sysctl.h> 55 #include <net/net_namespace.h> 56 57 #include <net/ip_vs.h> 58 59 60 /* 61 * It is for garbage collection of stale IPVS lblcr entries, 62 * when the table is full. 63 */ 64 #define CHECK_EXPIRE_INTERVAL (60*HZ) 65 #define ENTRY_TIMEOUT (6*60*HZ) 66 67 #define DEFAULT_EXPIRATION (24*60*60*HZ) 68 69 /* 70 * It is for full expiration check. 71 * When there is no partial expiration check (garbage collection) 72 * in a half hour, do a full expiration check to collect stale 73 * entries that haven't been touched for a day. 74 */ 75 #define COUNT_FOR_FULL_EXPIRATION 30 76 77 /* 78 * for IPVS lblcr entry hash table 79 */ 80 #ifndef CONFIG_IP_VS_LBLCR_TAB_BITS 81 #define CONFIG_IP_VS_LBLCR_TAB_BITS 10 82 #endif 83 #define IP_VS_LBLCR_TAB_BITS CONFIG_IP_VS_LBLCR_TAB_BITS 84 #define IP_VS_LBLCR_TAB_SIZE (1 << IP_VS_LBLCR_TAB_BITS) 85 #define IP_VS_LBLCR_TAB_MASK (IP_VS_LBLCR_TAB_SIZE - 1) 86 87 88 /* 89 * IPVS destination set structure and operations 90 */ 91 struct ip_vs_dest_set_elem { 92 struct list_head list; /* list link */ 93 struct ip_vs_dest *dest; /* destination server */ 94 struct rcu_head rcu_head; 95 }; 96 97 struct ip_vs_dest_set { 98 atomic_t size; /* set size */ 99 unsigned long lastmod; /* last modified time */ 100 struct list_head list; /* destination list */ 101 }; 102 103 104 static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, 105 struct ip_vs_dest *dest, bool check) 106 { 107 struct ip_vs_dest_set_elem *e; 108 109 if (check) { 110 list_for_each_entry(e, &set->list, list) { 111 if (e->dest == dest) 112 return; 113 } 114 } 115 116 e = kmalloc(sizeof(*e), GFP_ATOMIC); 117 if (e == NULL) 118 return; 119 120 ip_vs_dest_hold(dest); 121 e->dest = dest; 122 123 list_add_rcu(&e->list, &set->list); 124 atomic_inc(&set->size); 125 126 set->lastmod = jiffies; 127 } 128 129 static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) 130 { 131 struct ip_vs_dest_set_elem *e; 132 133 e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); 134 ip_vs_dest_put_and_free(e->dest); 135 kfree(e); 136 } 137 138 static void 139 ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) 140 { 141 struct ip_vs_dest_set_elem *e; 142 143 list_for_each_entry(e, &set->list, list) { 144 if (e->dest == dest) { 145 /* HIT */ 146 atomic_dec(&set->size); 147 set->lastmod = jiffies; 148 list_del_rcu(&e->list); 149 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); 150 break; 151 } 152 } 153 } 154 155 static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) 156 { 157 struct ip_vs_dest_set_elem *e, *ep; 158 159 list_for_each_entry_safe(e, ep, &set->list, list) { 160 list_del_rcu(&e->list); 161 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); 162 } 163 } 164 165 /* get weighted least-connection node in the destination set */ 166 static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) 167 { 168 register struct ip_vs_dest_set_elem *e; 169 struct ip_vs_dest *dest, *least; 170 int loh, doh; 171 172 /* select the first destination server, whose weight > 0 */ 173 list_for_each_entry_rcu(e, &set->list, list) { 174 least = e->dest; 175 if (least->flags & IP_VS_DEST_F_OVERLOAD) 176 continue; 177 178 if ((atomic_read(&least->weight) > 0) 179 && (least->flags & IP_VS_DEST_F_AVAILABLE)) { 180 loh = ip_vs_dest_conn_overhead(least); 181 goto nextstage; 182 } 183 } 184 return NULL; 185 186 /* find the destination with the weighted least load */ 187 nextstage: 188 list_for_each_entry_continue_rcu(e, &set->list, list) { 189 dest = e->dest; 190 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 191 continue; 192 193 doh = ip_vs_dest_conn_overhead(dest); 194 if (((__s64)loh * atomic_read(&dest->weight) > 195 (__s64)doh * atomic_read(&least->weight)) 196 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 197 least = dest; 198 loh = doh; 199 } 200 } 201 202 IP_VS_DBG_BUF(6, "%s(): server %s:%d " 203 "activeconns %d refcnt %d weight %d overhead %d\n", 204 __func__, 205 IP_VS_DBG_ADDR(least->af, &least->addr), 206 ntohs(least->port), 207 atomic_read(&least->activeconns), 208 refcount_read(&least->refcnt), 209 atomic_read(&least->weight), loh); 210 return least; 211 } 212 213 214 /* get weighted most-connection node in the destination set */ 215 static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) 216 { 217 register struct ip_vs_dest_set_elem *e; 218 struct ip_vs_dest *dest, *most; 219 int moh, doh; 220 221 if (set == NULL) 222 return NULL; 223 224 /* select the first destination server, whose weight > 0 */ 225 list_for_each_entry(e, &set->list, list) { 226 most = e->dest; 227 if (atomic_read(&most->weight) > 0) { 228 moh = ip_vs_dest_conn_overhead(most); 229 goto nextstage; 230 } 231 } 232 return NULL; 233 234 /* find the destination with the weighted most load */ 235 nextstage: 236 list_for_each_entry_continue(e, &set->list, list) { 237 dest = e->dest; 238 doh = ip_vs_dest_conn_overhead(dest); 239 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 240 if (((__s64)moh * atomic_read(&dest->weight) < 241 (__s64)doh * atomic_read(&most->weight)) 242 && (atomic_read(&dest->weight) > 0)) { 243 most = dest; 244 moh = doh; 245 } 246 } 247 248 IP_VS_DBG_BUF(6, "%s(): server %s:%d " 249 "activeconns %d refcnt %d weight %d overhead %d\n", 250 __func__, 251 IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port), 252 atomic_read(&most->activeconns), 253 refcount_read(&most->refcnt), 254 atomic_read(&most->weight), moh); 255 return most; 256 } 257 258 259 /* 260 * IPVS lblcr entry represents an association between destination 261 * IP address and its destination server set 262 */ 263 struct ip_vs_lblcr_entry { 264 struct hlist_node list; 265 int af; /* address family */ 266 union nf_inet_addr addr; /* destination IP address */ 267 struct ip_vs_dest_set set; /* destination server set */ 268 unsigned long lastuse; /* last used time */ 269 struct rcu_head rcu_head; 270 }; 271 272 273 /* 274 * IPVS lblcr hash table 275 */ 276 struct ip_vs_lblcr_table { 277 struct rcu_head rcu_head; 278 struct hlist_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ 279 atomic_t entries; /* number of entries */ 280 int max_size; /* maximum size of entries */ 281 struct timer_list periodic_timer; /* collect stale entries */ 282 struct ip_vs_service *svc; /* pointer back to service */ 283 int rover; /* rover for expire check */ 284 int counter; /* counter for no expire */ 285 bool dead; 286 }; 287 288 289 #ifdef CONFIG_SYSCTL 290 /* 291 * IPVS LBLCR sysctl table 292 */ 293 294 static struct ctl_table vs_vars_table[] = { 295 { 296 .procname = "lblcr_expiration", 297 .data = NULL, 298 .maxlen = sizeof(int), 299 .mode = 0644, 300 .proc_handler = proc_dointvec_jiffies, 301 }, 302 { } 303 }; 304 #endif 305 306 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 307 { 308 hlist_del_rcu(&en->list); 309 ip_vs_dest_set_eraseall(&en->set); 310 kfree_rcu(en, rcu_head); 311 } 312 313 314 /* 315 * Returns hash value for IPVS LBLCR entry 316 */ 317 static inline unsigned int 318 ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr) 319 { 320 __be32 addr_fold = addr->ip; 321 322 #ifdef CONFIG_IP_VS_IPV6 323 if (af == AF_INET6) 324 addr_fold = addr->ip6[0]^addr->ip6[1]^ 325 addr->ip6[2]^addr->ip6[3]; 326 #endif 327 return hash_32(ntohl(addr_fold), IP_VS_LBLCR_TAB_BITS); 328 } 329 330 331 /* 332 * Hash an entry in the ip_vs_lblcr_table. 333 * returns bool success. 334 */ 335 static void 336 ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) 337 { 338 unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr); 339 340 hlist_add_head_rcu(&en->list, &tbl->bucket[hash]); 341 atomic_inc(&tbl->entries); 342 } 343 344 345 /* Get ip_vs_lblcr_entry associated with supplied parameters. */ 346 static inline struct ip_vs_lblcr_entry * 347 ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, 348 const union nf_inet_addr *addr) 349 { 350 unsigned int hash = ip_vs_lblcr_hashkey(af, addr); 351 struct ip_vs_lblcr_entry *en; 352 353 hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list) 354 if (ip_vs_addr_equal(af, &en->addr, addr)) 355 return en; 356 357 return NULL; 358 } 359 360 361 /* 362 * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination 363 * IP address to a server. Called under spin lock. 364 */ 365 static inline struct ip_vs_lblcr_entry * 366 ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, 367 u16 af, struct ip_vs_dest *dest) 368 { 369 struct ip_vs_lblcr_entry *en; 370 371 en = ip_vs_lblcr_get(af, tbl, daddr); 372 if (!en) { 373 en = kmalloc(sizeof(*en), GFP_ATOMIC); 374 if (!en) 375 return NULL; 376 377 en->af = af; 378 ip_vs_addr_copy(af, &en->addr, daddr); 379 en->lastuse = jiffies; 380 381 /* initialize its dest set */ 382 atomic_set(&(en->set.size), 0); 383 INIT_LIST_HEAD(&en->set.list); 384 385 ip_vs_dest_set_insert(&en->set, dest, false); 386 387 ip_vs_lblcr_hash(tbl, en); 388 return en; 389 } 390 391 ip_vs_dest_set_insert(&en->set, dest, true); 392 393 return en; 394 } 395 396 397 /* 398 * Flush all the entries of the specified table. 399 */ 400 static void ip_vs_lblcr_flush(struct ip_vs_service *svc) 401 { 402 struct ip_vs_lblcr_table *tbl = svc->sched_data; 403 int i; 404 struct ip_vs_lblcr_entry *en; 405 struct hlist_node *next; 406 407 spin_lock_bh(&svc->sched_lock); 408 tbl->dead = true; 409 for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) { 410 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { 411 ip_vs_lblcr_free(en); 412 } 413 } 414 spin_unlock_bh(&svc->sched_lock); 415 } 416 417 static int sysctl_lblcr_expiration(struct ip_vs_service *svc) 418 { 419 #ifdef CONFIG_SYSCTL 420 return svc->ipvs->sysctl_lblcr_expiration; 421 #else 422 return DEFAULT_EXPIRATION; 423 #endif 424 } 425 426 static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) 427 { 428 struct ip_vs_lblcr_table *tbl = svc->sched_data; 429 unsigned long now = jiffies; 430 int i, j; 431 struct ip_vs_lblcr_entry *en; 432 struct hlist_node *next; 433 434 for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) { 435 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 436 437 spin_lock(&svc->sched_lock); 438 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { 439 if (time_after(en->lastuse + 440 sysctl_lblcr_expiration(svc), now)) 441 continue; 442 443 ip_vs_lblcr_free(en); 444 atomic_dec(&tbl->entries); 445 } 446 spin_unlock(&svc->sched_lock); 447 } 448 tbl->rover = j; 449 } 450 451 452 /* 453 * Periodical timer handler for IPVS lblcr table 454 * It is used to collect stale entries when the number of entries 455 * exceeds the maximum size of the table. 456 * 457 * Fixme: we probably need more complicated algorithm to collect 458 * entries that have not been used for a long time even 459 * if the number of entries doesn't exceed the maximum size 460 * of the table. 461 * The full expiration check is for this purpose now. 462 */ 463 static void ip_vs_lblcr_check_expire(struct timer_list *t) 464 { 465 struct ip_vs_lblcr_table *tbl = from_timer(tbl, t, periodic_timer); 466 struct ip_vs_service *svc = tbl->svc; 467 unsigned long now = jiffies; 468 int goal; 469 int i, j; 470 struct ip_vs_lblcr_entry *en; 471 struct hlist_node *next; 472 473 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { 474 /* do full expiration check */ 475 ip_vs_lblcr_full_check(svc); 476 tbl->counter = 1; 477 goto out; 478 } 479 480 if (atomic_read(&tbl->entries) <= tbl->max_size) { 481 tbl->counter++; 482 goto out; 483 } 484 485 goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3; 486 if (goal > tbl->max_size/2) 487 goal = tbl->max_size/2; 488 489 for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) { 490 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 491 492 spin_lock(&svc->sched_lock); 493 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) { 494 if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) 495 continue; 496 497 ip_vs_lblcr_free(en); 498 atomic_dec(&tbl->entries); 499 goal--; 500 } 501 spin_unlock(&svc->sched_lock); 502 if (goal <= 0) 503 break; 504 } 505 tbl->rover = j; 506 507 out: 508 mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL); 509 } 510 511 static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) 512 { 513 int i; 514 struct ip_vs_lblcr_table *tbl; 515 516 /* 517 * Allocate the ip_vs_lblcr_table for this service 518 */ 519 tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); 520 if (tbl == NULL) 521 return -ENOMEM; 522 523 svc->sched_data = tbl; 524 IP_VS_DBG(6, "LBLCR hash table (memory=%zdbytes) allocated for " 525 "current service\n", sizeof(*tbl)); 526 527 /* 528 * Initialize the hash buckets 529 */ 530 for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) { 531 INIT_HLIST_HEAD(&tbl->bucket[i]); 532 } 533 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; 534 tbl->rover = 0; 535 tbl->counter = 1; 536 tbl->dead = false; 537 tbl->svc = svc; 538 atomic_set(&tbl->entries, 0); 539 540 /* 541 * Hook periodic timer for garbage collection 542 */ 543 timer_setup(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 0); 544 mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); 545 546 return 0; 547 } 548 549 550 static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc) 551 { 552 struct ip_vs_lblcr_table *tbl = svc->sched_data; 553 554 /* remove periodic timer */ 555 del_timer_sync(&tbl->periodic_timer); 556 557 /* got to clean up table entries here */ 558 ip_vs_lblcr_flush(svc); 559 560 /* release the table itself */ 561 kfree_rcu(tbl, rcu_head); 562 IP_VS_DBG(6, "LBLCR hash table (memory=%zdbytes) released\n", 563 sizeof(*tbl)); 564 } 565 566 567 static inline struct ip_vs_dest * 568 __ip_vs_lblcr_schedule(struct ip_vs_service *svc) 569 { 570 struct ip_vs_dest *dest, *least; 571 int loh, doh; 572 573 /* 574 * We use the following formula to estimate the load: 575 * (dest overhead) / dest->weight 576 * 577 * Remember -- no floats in kernel mode!!! 578 * The comparison of h1*w2 > h2*w1 is equivalent to that of 579 * h1/w1 > h2/w2 580 * if every weight is larger than zero. 581 * 582 * The server with weight=0 is quiesced and will not receive any 583 * new connection. 584 */ 585 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 586 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 587 continue; 588 589 if (atomic_read(&dest->weight) > 0) { 590 least = dest; 591 loh = ip_vs_dest_conn_overhead(least); 592 goto nextstage; 593 } 594 } 595 return NULL; 596 597 /* 598 * Find the destination with the least load. 599 */ 600 nextstage: 601 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { 602 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 603 continue; 604 605 doh = ip_vs_dest_conn_overhead(dest); 606 if ((__s64)loh * atomic_read(&dest->weight) > 607 (__s64)doh * atomic_read(&least->weight)) { 608 least = dest; 609 loh = doh; 610 } 611 } 612 613 IP_VS_DBG_BUF(6, "LBLCR: server %s:%d " 614 "activeconns %d refcnt %d weight %d overhead %d\n", 615 IP_VS_DBG_ADDR(least->af, &least->addr), 616 ntohs(least->port), 617 atomic_read(&least->activeconns), 618 refcount_read(&least->refcnt), 619 atomic_read(&least->weight), loh); 620 621 return least; 622 } 623 624 625 /* 626 * If this destination server is overloaded and there is a less loaded 627 * server, then return true. 628 */ 629 static inline int 630 is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) 631 { 632 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { 633 struct ip_vs_dest *d; 634 635 list_for_each_entry_rcu(d, &svc->destinations, n_list) { 636 if (atomic_read(&d->activeconns)*2 637 < atomic_read(&d->weight)) { 638 return 1; 639 } 640 } 641 } 642 return 0; 643 } 644 645 646 /* 647 * Locality-Based (weighted) Least-Connection scheduling 648 */ 649 static struct ip_vs_dest * 650 ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, 651 struct ip_vs_iphdr *iph) 652 { 653 struct ip_vs_lblcr_table *tbl = svc->sched_data; 654 struct ip_vs_dest *dest; 655 struct ip_vs_lblcr_entry *en; 656 657 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 658 659 /* First look in our cache */ 660 en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr); 661 if (en) { 662 en->lastuse = jiffies; 663 664 /* Get the least loaded destination */ 665 dest = ip_vs_dest_set_min(&en->set); 666 667 /* More than one destination + enough time passed by, cleanup */ 668 if (atomic_read(&en->set.size) > 1 && 669 time_after(jiffies, en->set.lastmod + 670 sysctl_lblcr_expiration(svc))) { 671 spin_lock_bh(&svc->sched_lock); 672 if (atomic_read(&en->set.size) > 1) { 673 struct ip_vs_dest *m; 674 675 m = ip_vs_dest_set_max(&en->set); 676 if (m) 677 ip_vs_dest_set_erase(&en->set, m); 678 } 679 spin_unlock_bh(&svc->sched_lock); 680 } 681 682 /* If the destination is not overloaded, use it */ 683 if (dest && !is_overloaded(dest, svc)) 684 goto out; 685 686 /* The cache entry is invalid, time to schedule */ 687 dest = __ip_vs_lblcr_schedule(svc); 688 if (!dest) { 689 ip_vs_scheduler_err(svc, "no destination available"); 690 return NULL; 691 } 692 693 /* Update our cache entry */ 694 spin_lock_bh(&svc->sched_lock); 695 if (!tbl->dead) 696 ip_vs_dest_set_insert(&en->set, dest, true); 697 spin_unlock_bh(&svc->sched_lock); 698 goto out; 699 } 700 701 /* No cache entry, time to schedule */ 702 dest = __ip_vs_lblcr_schedule(svc); 703 if (!dest) { 704 IP_VS_DBG(1, "no destination available\n"); 705 return NULL; 706 } 707 708 /* If we fail to create a cache entry, we'll just use the valid dest */ 709 spin_lock_bh(&svc->sched_lock); 710 if (!tbl->dead) 711 ip_vs_lblcr_new(tbl, &iph->daddr, svc->af, dest); 712 spin_unlock_bh(&svc->sched_lock); 713 714 out: 715 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", 716 IP_VS_DBG_ADDR(svc->af, &iph->daddr), 717 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); 718 719 return dest; 720 } 721 722 723 /* 724 * IPVS LBLCR Scheduler structure 725 */ 726 static struct ip_vs_scheduler ip_vs_lblcr_scheduler = 727 { 728 .name = "lblcr", 729 .refcnt = ATOMIC_INIT(0), 730 .module = THIS_MODULE, 731 .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), 732 .init_service = ip_vs_lblcr_init_svc, 733 .done_service = ip_vs_lblcr_done_svc, 734 .schedule = ip_vs_lblcr_schedule, 735 }; 736 737 /* 738 * per netns init. 739 */ 740 #ifdef CONFIG_SYSCTL 741 static int __net_init __ip_vs_lblcr_init(struct net *net) 742 { 743 struct netns_ipvs *ipvs = net_ipvs(net); 744 745 if (!ipvs) 746 return -ENOENT; 747 748 if (!net_eq(net, &init_net)) { 749 ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, 750 sizeof(vs_vars_table), 751 GFP_KERNEL); 752 if (ipvs->lblcr_ctl_table == NULL) 753 return -ENOMEM; 754 755 /* Don't export sysctls to unprivileged users */ 756 if (net->user_ns != &init_user_ns) 757 ipvs->lblcr_ctl_table[0].procname = NULL; 758 } else 759 ipvs->lblcr_ctl_table = vs_vars_table; 760 ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; 761 ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; 762 763 ipvs->lblcr_ctl_header = 764 register_net_sysctl(net, "net/ipv4/vs", ipvs->lblcr_ctl_table); 765 if (!ipvs->lblcr_ctl_header) { 766 if (!net_eq(net, &init_net)) 767 kfree(ipvs->lblcr_ctl_table); 768 return -ENOMEM; 769 } 770 771 return 0; 772 } 773 774 static void __net_exit __ip_vs_lblcr_exit(struct net *net) 775 { 776 struct netns_ipvs *ipvs = net_ipvs(net); 777 778 unregister_net_sysctl_table(ipvs->lblcr_ctl_header); 779 780 if (!net_eq(net, &init_net)) 781 kfree(ipvs->lblcr_ctl_table); 782 } 783 784 #else 785 786 static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; } 787 static void __net_exit __ip_vs_lblcr_exit(struct net *net) { } 788 789 #endif 790 791 static struct pernet_operations ip_vs_lblcr_ops = { 792 .init = __ip_vs_lblcr_init, 793 .exit = __ip_vs_lblcr_exit, 794 }; 795 796 static int __init ip_vs_lblcr_init(void) 797 { 798 int ret; 799 800 ret = register_pernet_subsys(&ip_vs_lblcr_ops); 801 if (ret) 802 return ret; 803 804 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 805 if (ret) 806 unregister_pernet_subsys(&ip_vs_lblcr_ops); 807 return ret; 808 } 809 810 static void __exit ip_vs_lblcr_cleanup(void) 811 { 812 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 813 unregister_pernet_subsys(&ip_vs_lblcr_ops); 814 rcu_barrier(); 815 } 816 817 818 module_init(ip_vs_lblcr_init); 819 module_exit(ip_vs_lblcr_cleanup); 820 MODULE_LICENSE("GPL"); 821