1 /* 2 * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team 3 * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/errno.h> 16 #include <linux/netdevice.h> 17 #include <linux/filter.h> 18 #include <linux/if_team.h> 19 20 struct lb_priv; 21 22 typedef struct team_port *lb_select_tx_port_func_t(struct team *, 23 struct lb_priv *, 24 struct sk_buff *, 25 unsigned char); 26 27 #define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */ 28 29 struct lb_stats { 30 u64 tx_bytes; 31 }; 32 33 struct lb_pcpu_stats { 34 struct lb_stats hash_stats[LB_TX_HASHTABLE_SIZE]; 35 struct u64_stats_sync syncp; 36 }; 37 38 struct lb_stats_info { 39 struct lb_stats stats; 40 struct lb_stats last_stats; 41 struct team_option_inst_info *opt_inst_info; 42 }; 43 44 struct lb_port_mapping { 45 struct team_port __rcu *port; 46 struct team_option_inst_info *opt_inst_info; 47 }; 48 49 struct lb_priv_ex { 50 struct team *team; 51 struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE]; 52 struct sock_fprog_kern *orig_fprog; 53 struct { 54 unsigned int refresh_interval; /* in tenths of second */ 55 struct delayed_work refresh_dw; 56 struct lb_stats_info info[LB_TX_HASHTABLE_SIZE]; 57 } stats; 58 }; 59 60 struct lb_priv { 61 struct sk_filter __rcu *fp; 62 lb_select_tx_port_func_t __rcu *select_tx_port_func; 63 struct lb_pcpu_stats __percpu *pcpu_stats; 64 struct lb_priv_ex *ex; /* priv extension */ 65 }; 66 67 static struct lb_priv *get_lb_priv(struct team *team) 68 { 69 return (struct lb_priv *) &team->mode_priv; 70 } 71 72 struct lb_port_priv { 73 struct lb_stats __percpu *pcpu_stats; 74 struct lb_stats_info stats_info; 75 }; 76 77 static struct lb_port_priv *get_lb_port_priv(struct team_port *port) 78 { 79 return (struct lb_port_priv *) &port->mode_priv; 80 } 81 82 #define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \ 83 (lb_priv)->ex->tx_hash_to_port_mapping[hash].port 84 85 #define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \ 86 (lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info 87 88 static void lb_tx_hash_to_port_mapping_null_port(struct team *team, 89 struct team_port *port) 90 { 91 struct lb_priv *lb_priv = get_lb_priv(team); 92 bool changed = false; 93 int i; 94 95 for (i = 0; i < LB_TX_HASHTABLE_SIZE; i++) { 96 struct lb_port_mapping *pm; 97 98 pm = &lb_priv->ex->tx_hash_to_port_mapping[i]; 99 if (rcu_access_pointer(pm->port) == port) { 100 RCU_INIT_POINTER(pm->port, NULL); 101 team_option_inst_set_change(pm->opt_inst_info); 102 changed = true; 103 } 104 } 105 if (changed) 106 team_options_change_check(team); 107 } 108 109 /* Basic tx selection based solely by hash */ 110 static struct team_port *lb_hash_select_tx_port(struct team *team, 111 struct lb_priv *lb_priv, 112 struct sk_buff *skb, 113 unsigned char hash) 114 { 115 int port_index = team_num_to_port_index(team, hash); 116 117 return team_get_port_by_index_rcu(team, port_index); 118 } 119 120 /* Hash to port mapping select tx port */ 121 static struct team_port *lb_htpm_select_tx_port(struct team *team, 122 struct lb_priv *lb_priv, 123 struct sk_buff *skb, 124 unsigned char hash) 125 { 126 return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); 127 } 128 129 struct lb_select_tx_port { 130 char *name; 131 lb_select_tx_port_func_t *func; 132 }; 133 134 static const struct lb_select_tx_port lb_select_tx_port_list[] = { 135 { 136 .name = "hash", 137 .func = lb_hash_select_tx_port, 138 }, 139 { 140 .name = "hash_to_port_mapping", 141 .func = lb_htpm_select_tx_port, 142 }, 143 }; 144 #define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list) 145 146 static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t *func) 147 { 148 int i; 149 150 for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { 151 const struct lb_select_tx_port *item; 152 153 item = &lb_select_tx_port_list[i]; 154 if (item->func == func) 155 return item->name; 156 } 157 return NULL; 158 } 159 160 static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name) 161 { 162 int i; 163 164 for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { 165 const struct lb_select_tx_port *item; 166 167 item = &lb_select_tx_port_list[i]; 168 if (!strcmp(item->name, name)) 169 return item->func; 170 } 171 return NULL; 172 } 173 174 static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv, 175 struct sk_buff *skb) 176 { 177 struct sk_filter *fp; 178 uint32_t lhash; 179 unsigned char *c; 180 181 fp = rcu_dereference_bh(lb_priv->fp); 182 if (unlikely(!fp)) 183 return 0; 184 lhash = SK_RUN_FILTER(fp, skb); 185 c = (char *) &lhash; 186 return c[0] ^ c[1] ^ c[2] ^ c[3]; 187 } 188 189 static void lb_update_tx_stats(unsigned int tx_bytes, struct lb_priv *lb_priv, 190 struct lb_port_priv *lb_port_priv, 191 unsigned char hash) 192 { 193 struct lb_pcpu_stats *pcpu_stats; 194 struct lb_stats *port_stats; 195 struct lb_stats *hash_stats; 196 197 pcpu_stats = this_cpu_ptr(lb_priv->pcpu_stats); 198 port_stats = this_cpu_ptr(lb_port_priv->pcpu_stats); 199 hash_stats = &pcpu_stats->hash_stats[hash]; 200 u64_stats_update_begin(&pcpu_stats->syncp); 201 port_stats->tx_bytes += tx_bytes; 202 hash_stats->tx_bytes += tx_bytes; 203 u64_stats_update_end(&pcpu_stats->syncp); 204 } 205 206 static bool lb_transmit(struct team *team, struct sk_buff *skb) 207 { 208 struct lb_priv *lb_priv = get_lb_priv(team); 209 lb_select_tx_port_func_t *select_tx_port_func; 210 struct team_port *port; 211 unsigned char hash; 212 unsigned int tx_bytes = skb->len; 213 214 hash = lb_get_skb_hash(lb_priv, skb); 215 select_tx_port_func = rcu_dereference_bh(lb_priv->select_tx_port_func); 216 port = select_tx_port_func(team, lb_priv, skb, hash); 217 if (unlikely(!port)) 218 goto drop; 219 if (team_dev_queue_xmit(team, port, skb)) 220 return false; 221 lb_update_tx_stats(tx_bytes, lb_priv, get_lb_port_priv(port), hash); 222 return true; 223 224 drop: 225 dev_kfree_skb_any(skb); 226 return false; 227 } 228 229 static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx) 230 { 231 struct lb_priv *lb_priv = get_lb_priv(team); 232 233 if (!lb_priv->ex->orig_fprog) { 234 ctx->data.bin_val.len = 0; 235 ctx->data.bin_val.ptr = NULL; 236 return 0; 237 } 238 ctx->data.bin_val.len = lb_priv->ex->orig_fprog->len * 239 sizeof(struct sock_filter); 240 ctx->data.bin_val.ptr = lb_priv->ex->orig_fprog->filter; 241 return 0; 242 } 243 244 static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len, 245 const void *data) 246 { 247 struct sock_fprog_kern *fprog; 248 struct sock_filter *filter = (struct sock_filter *) data; 249 250 if (data_len % sizeof(struct sock_filter)) 251 return -EINVAL; 252 fprog = kmalloc(sizeof(*fprog), GFP_KERNEL); 253 if (!fprog) 254 return -ENOMEM; 255 fprog->filter = kmemdup(filter, data_len, GFP_KERNEL); 256 if (!fprog->filter) { 257 kfree(fprog); 258 return -ENOMEM; 259 } 260 fprog->len = data_len / sizeof(struct sock_filter); 261 *pfprog = fprog; 262 return 0; 263 } 264 265 static void __fprog_destroy(struct sock_fprog_kern *fprog) 266 { 267 kfree(fprog->filter); 268 kfree(fprog); 269 } 270 271 static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) 272 { 273 struct lb_priv *lb_priv = get_lb_priv(team); 274 struct sk_filter *fp = NULL; 275 struct sk_filter *orig_fp; 276 struct sock_fprog_kern *fprog = NULL; 277 int err; 278 279 if (ctx->data.bin_val.len) { 280 err = __fprog_create(&fprog, ctx->data.bin_val.len, 281 ctx->data.bin_val.ptr); 282 if (err) 283 return err; 284 err = sk_unattached_filter_create(&fp, fprog); 285 if (err) { 286 __fprog_destroy(fprog); 287 return err; 288 } 289 } 290 291 if (lb_priv->ex->orig_fprog) { 292 /* Clear old filter data */ 293 __fprog_destroy(lb_priv->ex->orig_fprog); 294 orig_fp = rcu_dereference_protected(lb_priv->fp, 295 lockdep_is_held(&team->lock)); 296 sk_unattached_filter_destroy(orig_fp); 297 } 298 299 rcu_assign_pointer(lb_priv->fp, fp); 300 lb_priv->ex->orig_fprog = fprog; 301 return 0; 302 } 303 304 static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx) 305 { 306 struct lb_priv *lb_priv = get_lb_priv(team); 307 lb_select_tx_port_func_t *func; 308 char *name; 309 310 func = rcu_dereference_protected(lb_priv->select_tx_port_func, 311 lockdep_is_held(&team->lock)); 312 name = lb_select_tx_port_get_name(func); 313 BUG_ON(!name); 314 ctx->data.str_val = name; 315 return 0; 316 } 317 318 static int lb_tx_method_set(struct team *team, struct team_gsetter_ctx *ctx) 319 { 320 struct lb_priv *lb_priv = get_lb_priv(team); 321 lb_select_tx_port_func_t *func; 322 323 func = lb_select_tx_port_get_func(ctx->data.str_val); 324 if (!func) 325 return -EINVAL; 326 rcu_assign_pointer(lb_priv->select_tx_port_func, func); 327 return 0; 328 } 329 330 static int lb_tx_hash_to_port_mapping_init(struct team *team, 331 struct team_option_inst_info *info) 332 { 333 struct lb_priv *lb_priv = get_lb_priv(team); 334 unsigned char hash = info->array_index; 335 336 LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv, hash) = info; 337 return 0; 338 } 339 340 static int lb_tx_hash_to_port_mapping_get(struct team *team, 341 struct team_gsetter_ctx *ctx) 342 { 343 struct lb_priv *lb_priv = get_lb_priv(team); 344 struct team_port *port; 345 unsigned char hash = ctx->info->array_index; 346 347 port = LB_HTPM_PORT_BY_HASH(lb_priv, hash); 348 ctx->data.u32_val = port ? port->dev->ifindex : 0; 349 return 0; 350 } 351 352 static int lb_tx_hash_to_port_mapping_set(struct team *team, 353 struct team_gsetter_ctx *ctx) 354 { 355 struct lb_priv *lb_priv = get_lb_priv(team); 356 struct team_port *port; 357 unsigned char hash = ctx->info->array_index; 358 359 list_for_each_entry(port, &team->port_list, list) { 360 if (ctx->data.u32_val == port->dev->ifindex && 361 team_port_enabled(port)) { 362 rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv, hash), 363 port); 364 return 0; 365 } 366 } 367 return -ENODEV; 368 } 369 370 static int lb_hash_stats_init(struct team *team, 371 struct team_option_inst_info *info) 372 { 373 struct lb_priv *lb_priv = get_lb_priv(team); 374 unsigned char hash = info->array_index; 375 376 lb_priv->ex->stats.info[hash].opt_inst_info = info; 377 return 0; 378 } 379 380 static int lb_hash_stats_get(struct team *team, struct team_gsetter_ctx *ctx) 381 { 382 struct lb_priv *lb_priv = get_lb_priv(team); 383 unsigned char hash = ctx->info->array_index; 384 385 ctx->data.bin_val.ptr = &lb_priv->ex->stats.info[hash].stats; 386 ctx->data.bin_val.len = sizeof(struct lb_stats); 387 return 0; 388 } 389 390 static int lb_port_stats_init(struct team *team, 391 struct team_option_inst_info *info) 392 { 393 struct team_port *port = info->port; 394 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 395 396 lb_port_priv->stats_info.opt_inst_info = info; 397 return 0; 398 } 399 400 static int lb_port_stats_get(struct team *team, struct team_gsetter_ctx *ctx) 401 { 402 struct team_port *port = ctx->info->port; 403 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 404 405 ctx->data.bin_val.ptr = &lb_port_priv->stats_info.stats; 406 ctx->data.bin_val.len = sizeof(struct lb_stats); 407 return 0; 408 } 409 410 static void __lb_stats_info_refresh_prepare(struct lb_stats_info *s_info) 411 { 412 memcpy(&s_info->last_stats, &s_info->stats, sizeof(struct lb_stats)); 413 memset(&s_info->stats, 0, sizeof(struct lb_stats)); 414 } 415 416 static bool __lb_stats_info_refresh_check(struct lb_stats_info *s_info, 417 struct team *team) 418 { 419 if (memcmp(&s_info->last_stats, &s_info->stats, 420 sizeof(struct lb_stats))) { 421 team_option_inst_set_change(s_info->opt_inst_info); 422 return true; 423 } 424 return false; 425 } 426 427 static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats, 428 struct lb_stats *cpu_stats, 429 struct u64_stats_sync *syncp) 430 { 431 unsigned int start; 432 struct lb_stats tmp; 433 434 do { 435 start = u64_stats_fetch_begin_irq(syncp); 436 tmp.tx_bytes = cpu_stats->tx_bytes; 437 } while (u64_stats_fetch_retry_irq(syncp, start)); 438 acc_stats->tx_bytes += tmp.tx_bytes; 439 } 440 441 static void lb_stats_refresh(struct work_struct *work) 442 { 443 struct team *team; 444 struct lb_priv *lb_priv; 445 struct lb_priv_ex *lb_priv_ex; 446 struct lb_pcpu_stats *pcpu_stats; 447 struct lb_stats *stats; 448 struct lb_stats_info *s_info; 449 struct team_port *port; 450 bool changed = false; 451 int i; 452 int j; 453 454 lb_priv_ex = container_of(work, struct lb_priv_ex, 455 stats.refresh_dw.work); 456 457 team = lb_priv_ex->team; 458 lb_priv = get_lb_priv(team); 459 460 if (!mutex_trylock(&team->lock)) { 461 schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); 462 return; 463 } 464 465 for (j = 0; j < LB_TX_HASHTABLE_SIZE; j++) { 466 s_info = &lb_priv->ex->stats.info[j]; 467 __lb_stats_info_refresh_prepare(s_info); 468 for_each_possible_cpu(i) { 469 pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); 470 stats = &pcpu_stats->hash_stats[j]; 471 __lb_one_cpu_stats_add(&s_info->stats, stats, 472 &pcpu_stats->syncp); 473 } 474 changed |= __lb_stats_info_refresh_check(s_info, team); 475 } 476 477 list_for_each_entry(port, &team->port_list, list) { 478 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 479 480 s_info = &lb_port_priv->stats_info; 481 __lb_stats_info_refresh_prepare(s_info); 482 for_each_possible_cpu(i) { 483 pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); 484 stats = per_cpu_ptr(lb_port_priv->pcpu_stats, i); 485 __lb_one_cpu_stats_add(&s_info->stats, stats, 486 &pcpu_stats->syncp); 487 } 488 changed |= __lb_stats_info_refresh_check(s_info, team); 489 } 490 491 if (changed) 492 team_options_change_check(team); 493 494 schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 495 (lb_priv_ex->stats.refresh_interval * HZ) / 10); 496 497 mutex_unlock(&team->lock); 498 } 499 500 static int lb_stats_refresh_interval_get(struct team *team, 501 struct team_gsetter_ctx *ctx) 502 { 503 struct lb_priv *lb_priv = get_lb_priv(team); 504 505 ctx->data.u32_val = lb_priv->ex->stats.refresh_interval; 506 return 0; 507 } 508 509 static int lb_stats_refresh_interval_set(struct team *team, 510 struct team_gsetter_ctx *ctx) 511 { 512 struct lb_priv *lb_priv = get_lb_priv(team); 513 unsigned int interval; 514 515 interval = ctx->data.u32_val; 516 if (lb_priv->ex->stats.refresh_interval == interval) 517 return 0; 518 lb_priv->ex->stats.refresh_interval = interval; 519 if (interval) 520 schedule_delayed_work(&lb_priv->ex->stats.refresh_dw, 0); 521 else 522 cancel_delayed_work(&lb_priv->ex->stats.refresh_dw); 523 return 0; 524 } 525 526 static const struct team_option lb_options[] = { 527 { 528 .name = "bpf_hash_func", 529 .type = TEAM_OPTION_TYPE_BINARY, 530 .getter = lb_bpf_func_get, 531 .setter = lb_bpf_func_set, 532 }, 533 { 534 .name = "lb_tx_method", 535 .type = TEAM_OPTION_TYPE_STRING, 536 .getter = lb_tx_method_get, 537 .setter = lb_tx_method_set, 538 }, 539 { 540 .name = "lb_tx_hash_to_port_mapping", 541 .array_size = LB_TX_HASHTABLE_SIZE, 542 .type = TEAM_OPTION_TYPE_U32, 543 .init = lb_tx_hash_to_port_mapping_init, 544 .getter = lb_tx_hash_to_port_mapping_get, 545 .setter = lb_tx_hash_to_port_mapping_set, 546 }, 547 { 548 .name = "lb_hash_stats", 549 .array_size = LB_TX_HASHTABLE_SIZE, 550 .type = TEAM_OPTION_TYPE_BINARY, 551 .init = lb_hash_stats_init, 552 .getter = lb_hash_stats_get, 553 }, 554 { 555 .name = "lb_port_stats", 556 .per_port = true, 557 .type = TEAM_OPTION_TYPE_BINARY, 558 .init = lb_port_stats_init, 559 .getter = lb_port_stats_get, 560 }, 561 { 562 .name = "lb_stats_refresh_interval", 563 .type = TEAM_OPTION_TYPE_U32, 564 .getter = lb_stats_refresh_interval_get, 565 .setter = lb_stats_refresh_interval_set, 566 }, 567 }; 568 569 static int lb_init(struct team *team) 570 { 571 struct lb_priv *lb_priv = get_lb_priv(team); 572 lb_select_tx_port_func_t *func; 573 int i, err; 574 575 /* set default tx port selector */ 576 func = lb_select_tx_port_get_func("hash"); 577 BUG_ON(!func); 578 rcu_assign_pointer(lb_priv->select_tx_port_func, func); 579 580 lb_priv->ex = kzalloc(sizeof(*lb_priv->ex), GFP_KERNEL); 581 if (!lb_priv->ex) 582 return -ENOMEM; 583 lb_priv->ex->team = team; 584 585 lb_priv->pcpu_stats = alloc_percpu(struct lb_pcpu_stats); 586 if (!lb_priv->pcpu_stats) { 587 err = -ENOMEM; 588 goto err_alloc_pcpu_stats; 589 } 590 591 for_each_possible_cpu(i) { 592 struct lb_pcpu_stats *team_lb_stats; 593 team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); 594 u64_stats_init(&team_lb_stats->syncp); 595 } 596 597 598 INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); 599 600 err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); 601 if (err) 602 goto err_options_register; 603 return 0; 604 605 err_options_register: 606 free_percpu(lb_priv->pcpu_stats); 607 err_alloc_pcpu_stats: 608 kfree(lb_priv->ex); 609 return err; 610 } 611 612 static void lb_exit(struct team *team) 613 { 614 struct lb_priv *lb_priv = get_lb_priv(team); 615 616 team_options_unregister(team, lb_options, 617 ARRAY_SIZE(lb_options)); 618 cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw); 619 free_percpu(lb_priv->pcpu_stats); 620 kfree(lb_priv->ex); 621 } 622 623 static int lb_port_enter(struct team *team, struct team_port *port) 624 { 625 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 626 627 lb_port_priv->pcpu_stats = alloc_percpu(struct lb_stats); 628 if (!lb_port_priv->pcpu_stats) 629 return -ENOMEM; 630 return 0; 631 } 632 633 static void lb_port_leave(struct team *team, struct team_port *port) 634 { 635 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 636 637 free_percpu(lb_port_priv->pcpu_stats); 638 } 639 640 static void lb_port_disabled(struct team *team, struct team_port *port) 641 { 642 lb_tx_hash_to_port_mapping_null_port(team, port); 643 } 644 645 static const struct team_mode_ops lb_mode_ops = { 646 .init = lb_init, 647 .exit = lb_exit, 648 .port_enter = lb_port_enter, 649 .port_leave = lb_port_leave, 650 .port_disabled = lb_port_disabled, 651 .transmit = lb_transmit, 652 }; 653 654 static const struct team_mode lb_mode = { 655 .kind = "loadbalance", 656 .owner = THIS_MODULE, 657 .priv_size = sizeof(struct lb_priv), 658 .port_priv_size = sizeof(struct lb_port_priv), 659 .ops = &lb_mode_ops, 660 }; 661 662 static int __init lb_init_module(void) 663 { 664 return team_mode_register(&lb_mode); 665 } 666 667 static void __exit lb_cleanup_module(void) 668 { 669 team_mode_unregister(&lb_mode); 670 } 671 672 module_init(lb_init_module); 673 module_exit(lb_cleanup_module); 674 675 MODULE_LICENSE("GPL v2"); 676 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); 677 MODULE_DESCRIPTION("Load-balancing mode for team"); 678 MODULE_ALIAS("team-mode-loadbalance"); 679