1 /* 2 * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team 3 * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/errno.h> 16 #include <linux/netdevice.h> 17 #include <linux/filter.h> 18 #include <linux/if_team.h> 19 20 struct lb_priv; 21 22 typedef struct team_port *lb_select_tx_port_func_t(struct team *, 23 struct lb_priv *, 24 struct sk_buff *, 25 unsigned char); 26 27 #define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */ 28 29 struct lb_stats { 30 u64 tx_bytes; 31 }; 32 33 struct lb_pcpu_stats { 34 struct lb_stats hash_stats[LB_TX_HASHTABLE_SIZE]; 35 struct u64_stats_sync syncp; 36 }; 37 38 struct lb_stats_info { 39 struct lb_stats stats; 40 struct lb_stats last_stats; 41 struct team_option_inst_info *opt_inst_info; 42 }; 43 44 struct lb_port_mapping { 45 struct team_port __rcu *port; 46 struct team_option_inst_info *opt_inst_info; 47 }; 48 49 struct lb_priv_ex { 50 struct team *team; 51 struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE]; 52 struct sock_fprog_kern *orig_fprog; 53 struct { 54 unsigned int refresh_interval; /* in tenths of second */ 55 struct delayed_work refresh_dw; 56 struct lb_stats_info info[LB_TX_HASHTABLE_SIZE]; 57 } stats; 58 }; 59 60 struct lb_priv { 61 struct bpf_prog __rcu *fp; 62 lb_select_tx_port_func_t __rcu *select_tx_port_func; 63 struct lb_pcpu_stats __percpu *pcpu_stats; 64 struct lb_priv_ex *ex; /* priv extension */ 65 }; 66 67 static struct lb_priv *get_lb_priv(struct team *team) 68 { 69 return (struct lb_priv *) &team->mode_priv; 70 } 71 72 struct lb_port_priv { 73 struct lb_stats __percpu *pcpu_stats; 74 struct lb_stats_info stats_info; 75 }; 76 77 static struct lb_port_priv *get_lb_port_priv(struct team_port *port) 78 { 79 return (struct lb_port_priv *) &port->mode_priv; 80 } 81 82 #define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \ 83 (lb_priv)->ex->tx_hash_to_port_mapping[hash].port 84 85 #define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \ 86 (lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info 87 88 static void lb_tx_hash_to_port_mapping_null_port(struct team *team, 89 struct team_port *port) 90 { 91 struct lb_priv *lb_priv = get_lb_priv(team); 92 bool changed = false; 93 int i; 94 95 for (i = 0; i < LB_TX_HASHTABLE_SIZE; i++) { 96 struct lb_port_mapping *pm; 97 98 pm = &lb_priv->ex->tx_hash_to_port_mapping[i]; 99 if (rcu_access_pointer(pm->port) == port) { 100 RCU_INIT_POINTER(pm->port, NULL); 101 team_option_inst_set_change(pm->opt_inst_info); 102 changed = true; 103 } 104 } 105 if (changed) 106 team_options_change_check(team); 107 } 108 109 /* Basic tx selection based solely by hash */ 110 static struct team_port *lb_hash_select_tx_port(struct team *team, 111 struct lb_priv *lb_priv, 112 struct sk_buff *skb, 113 unsigned char hash) 114 { 115 int port_index = team_num_to_port_index(team, hash); 116 117 return team_get_port_by_index_rcu(team, port_index); 118 } 119 120 /* Hash to port mapping select tx port */ 121 static struct team_port *lb_htpm_select_tx_port(struct team *team, 122 struct lb_priv *lb_priv, 123 struct sk_buff *skb, 124 unsigned char hash) 125 { 126 return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); 127 } 128 129 struct lb_select_tx_port { 130 char *name; 131 lb_select_tx_port_func_t *func; 132 }; 133 134 static const struct lb_select_tx_port lb_select_tx_port_list[] = { 135 { 136 .name = "hash", 137 .func = lb_hash_select_tx_port, 138 }, 139 { 140 .name = "hash_to_port_mapping", 141 .func = lb_htpm_select_tx_port, 142 }, 143 }; 144 #define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list) 145 146 static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t *func) 147 { 148 int i; 149 150 for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { 151 const struct lb_select_tx_port *item; 152 153 item = &lb_select_tx_port_list[i]; 154 if (item->func == func) 155 return item->name; 156 } 157 return NULL; 158 } 159 160 static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name) 161 { 162 int i; 163 164 for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { 165 const struct lb_select_tx_port *item; 166 167 item = &lb_select_tx_port_list[i]; 168 if (!strcmp(item->name, name)) 169 return item->func; 170 } 171 return NULL; 172 } 173 174 static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv, 175 struct sk_buff *skb) 176 { 177 struct bpf_prog *fp; 178 uint32_t lhash; 179 unsigned char *c; 180 181 fp = rcu_dereference_bh(lb_priv->fp); 182 if (unlikely(!fp)) 183 return 0; 184 lhash = BPF_PROG_RUN(fp, skb); 185 c = (char *) &lhash; 186 return c[0] ^ c[1] ^ c[2] ^ c[3]; 187 } 188 189 static void lb_update_tx_stats(unsigned int tx_bytes, struct lb_priv *lb_priv, 190 struct lb_port_priv *lb_port_priv, 191 unsigned char hash) 192 { 193 struct lb_pcpu_stats *pcpu_stats; 194 struct lb_stats *port_stats; 195 struct lb_stats *hash_stats; 196 197 pcpu_stats = this_cpu_ptr(lb_priv->pcpu_stats); 198 port_stats = this_cpu_ptr(lb_port_priv->pcpu_stats); 199 hash_stats = &pcpu_stats->hash_stats[hash]; 200 u64_stats_update_begin(&pcpu_stats->syncp); 201 port_stats->tx_bytes += tx_bytes; 202 hash_stats->tx_bytes += tx_bytes; 203 u64_stats_update_end(&pcpu_stats->syncp); 204 } 205 206 static bool lb_transmit(struct team *team, struct sk_buff *skb) 207 { 208 struct lb_priv *lb_priv = get_lb_priv(team); 209 lb_select_tx_port_func_t *select_tx_port_func; 210 struct team_port *port; 211 unsigned char hash; 212 unsigned int tx_bytes = skb->len; 213 214 hash = lb_get_skb_hash(lb_priv, skb); 215 select_tx_port_func = rcu_dereference_bh(lb_priv->select_tx_port_func); 216 port = select_tx_port_func(team, lb_priv, skb, hash); 217 if (unlikely(!port)) 218 goto drop; 219 if (team_dev_queue_xmit(team, port, skb)) 220 return false; 221 lb_update_tx_stats(tx_bytes, lb_priv, get_lb_port_priv(port), hash); 222 return true; 223 224 drop: 225 dev_kfree_skb_any(skb); 226 return false; 227 } 228 229 static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx) 230 { 231 struct lb_priv *lb_priv = get_lb_priv(team); 232 233 if (!lb_priv->ex->orig_fprog) { 234 ctx->data.bin_val.len = 0; 235 ctx->data.bin_val.ptr = NULL; 236 return 0; 237 } 238 ctx->data.bin_val.len = lb_priv->ex->orig_fprog->len * 239 sizeof(struct sock_filter); 240 ctx->data.bin_val.ptr = lb_priv->ex->orig_fprog->filter; 241 return 0; 242 } 243 244 static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len, 245 const void *data) 246 { 247 struct sock_fprog_kern *fprog; 248 struct sock_filter *filter = (struct sock_filter *) data; 249 250 if (data_len % sizeof(struct sock_filter)) 251 return -EINVAL; 252 fprog = kmalloc(sizeof(*fprog), GFP_KERNEL); 253 if (!fprog) 254 return -ENOMEM; 255 fprog->filter = kmemdup(filter, data_len, GFP_KERNEL); 256 if (!fprog->filter) { 257 kfree(fprog); 258 return -ENOMEM; 259 } 260 fprog->len = data_len / sizeof(struct sock_filter); 261 *pfprog = fprog; 262 return 0; 263 } 264 265 static void __fprog_destroy(struct sock_fprog_kern *fprog) 266 { 267 kfree(fprog->filter); 268 kfree(fprog); 269 } 270 271 static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) 272 { 273 struct lb_priv *lb_priv = get_lb_priv(team); 274 struct bpf_prog *fp = NULL; 275 struct bpf_prog *orig_fp = NULL; 276 struct sock_fprog_kern *fprog = NULL; 277 int err; 278 279 if (ctx->data.bin_val.len) { 280 err = __fprog_create(&fprog, ctx->data.bin_val.len, 281 ctx->data.bin_val.ptr); 282 if (err) 283 return err; 284 err = bpf_prog_create(&fp, fprog); 285 if (err) { 286 __fprog_destroy(fprog); 287 return err; 288 } 289 } 290 291 if (lb_priv->ex->orig_fprog) { 292 /* Clear old filter data */ 293 __fprog_destroy(lb_priv->ex->orig_fprog); 294 orig_fp = rcu_dereference_protected(lb_priv->fp, 295 lockdep_is_held(&team->lock)); 296 } 297 298 rcu_assign_pointer(lb_priv->fp, fp); 299 lb_priv->ex->orig_fprog = fprog; 300 301 if (orig_fp) { 302 synchronize_rcu(); 303 bpf_prog_destroy(orig_fp); 304 } 305 return 0; 306 } 307 308 static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx) 309 { 310 struct lb_priv *lb_priv = get_lb_priv(team); 311 lb_select_tx_port_func_t *func; 312 char *name; 313 314 func = rcu_dereference_protected(lb_priv->select_tx_port_func, 315 lockdep_is_held(&team->lock)); 316 name = lb_select_tx_port_get_name(func); 317 BUG_ON(!name); 318 ctx->data.str_val = name; 319 return 0; 320 } 321 322 static int lb_tx_method_set(struct team *team, struct team_gsetter_ctx *ctx) 323 { 324 struct lb_priv *lb_priv = get_lb_priv(team); 325 lb_select_tx_port_func_t *func; 326 327 func = lb_select_tx_port_get_func(ctx->data.str_val); 328 if (!func) 329 return -EINVAL; 330 rcu_assign_pointer(lb_priv->select_tx_port_func, func); 331 return 0; 332 } 333 334 static int lb_tx_hash_to_port_mapping_init(struct team *team, 335 struct team_option_inst_info *info) 336 { 337 struct lb_priv *lb_priv = get_lb_priv(team); 338 unsigned char hash = info->array_index; 339 340 LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv, hash) = info; 341 return 0; 342 } 343 344 static int lb_tx_hash_to_port_mapping_get(struct team *team, 345 struct team_gsetter_ctx *ctx) 346 { 347 struct lb_priv *lb_priv = get_lb_priv(team); 348 struct team_port *port; 349 unsigned char hash = ctx->info->array_index; 350 351 port = LB_HTPM_PORT_BY_HASH(lb_priv, hash); 352 ctx->data.u32_val = port ? port->dev->ifindex : 0; 353 return 0; 354 } 355 356 static int lb_tx_hash_to_port_mapping_set(struct team *team, 357 struct team_gsetter_ctx *ctx) 358 { 359 struct lb_priv *lb_priv = get_lb_priv(team); 360 struct team_port *port; 361 unsigned char hash = ctx->info->array_index; 362 363 list_for_each_entry(port, &team->port_list, list) { 364 if (ctx->data.u32_val == port->dev->ifindex && 365 team_port_enabled(port)) { 366 rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv, hash), 367 port); 368 return 0; 369 } 370 } 371 return -ENODEV; 372 } 373 374 static int lb_hash_stats_init(struct team *team, 375 struct team_option_inst_info *info) 376 { 377 struct lb_priv *lb_priv = get_lb_priv(team); 378 unsigned char hash = info->array_index; 379 380 lb_priv->ex->stats.info[hash].opt_inst_info = info; 381 return 0; 382 } 383 384 static int lb_hash_stats_get(struct team *team, struct team_gsetter_ctx *ctx) 385 { 386 struct lb_priv *lb_priv = get_lb_priv(team); 387 unsigned char hash = ctx->info->array_index; 388 389 ctx->data.bin_val.ptr = &lb_priv->ex->stats.info[hash].stats; 390 ctx->data.bin_val.len = sizeof(struct lb_stats); 391 return 0; 392 } 393 394 static int lb_port_stats_init(struct team *team, 395 struct team_option_inst_info *info) 396 { 397 struct team_port *port = info->port; 398 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 399 400 lb_port_priv->stats_info.opt_inst_info = info; 401 return 0; 402 } 403 404 static int lb_port_stats_get(struct team *team, struct team_gsetter_ctx *ctx) 405 { 406 struct team_port *port = ctx->info->port; 407 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 408 409 ctx->data.bin_val.ptr = &lb_port_priv->stats_info.stats; 410 ctx->data.bin_val.len = sizeof(struct lb_stats); 411 return 0; 412 } 413 414 static void __lb_stats_info_refresh_prepare(struct lb_stats_info *s_info) 415 { 416 memcpy(&s_info->last_stats, &s_info->stats, sizeof(struct lb_stats)); 417 memset(&s_info->stats, 0, sizeof(struct lb_stats)); 418 } 419 420 static bool __lb_stats_info_refresh_check(struct lb_stats_info *s_info, 421 struct team *team) 422 { 423 if (memcmp(&s_info->last_stats, &s_info->stats, 424 sizeof(struct lb_stats))) { 425 team_option_inst_set_change(s_info->opt_inst_info); 426 return true; 427 } 428 return false; 429 } 430 431 static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats, 432 struct lb_stats *cpu_stats, 433 struct u64_stats_sync *syncp) 434 { 435 unsigned int start; 436 struct lb_stats tmp; 437 438 do { 439 start = u64_stats_fetch_begin_irq(syncp); 440 tmp.tx_bytes = cpu_stats->tx_bytes; 441 } while (u64_stats_fetch_retry_irq(syncp, start)); 442 acc_stats->tx_bytes += tmp.tx_bytes; 443 } 444 445 static void lb_stats_refresh(struct work_struct *work) 446 { 447 struct team *team; 448 struct lb_priv *lb_priv; 449 struct lb_priv_ex *lb_priv_ex; 450 struct lb_pcpu_stats *pcpu_stats; 451 struct lb_stats *stats; 452 struct lb_stats_info *s_info; 453 struct team_port *port; 454 bool changed = false; 455 int i; 456 int j; 457 458 lb_priv_ex = container_of(work, struct lb_priv_ex, 459 stats.refresh_dw.work); 460 461 team = lb_priv_ex->team; 462 lb_priv = get_lb_priv(team); 463 464 if (!mutex_trylock(&team->lock)) { 465 schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); 466 return; 467 } 468 469 for (j = 0; j < LB_TX_HASHTABLE_SIZE; j++) { 470 s_info = &lb_priv->ex->stats.info[j]; 471 __lb_stats_info_refresh_prepare(s_info); 472 for_each_possible_cpu(i) { 473 pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); 474 stats = &pcpu_stats->hash_stats[j]; 475 __lb_one_cpu_stats_add(&s_info->stats, stats, 476 &pcpu_stats->syncp); 477 } 478 changed |= __lb_stats_info_refresh_check(s_info, team); 479 } 480 481 list_for_each_entry(port, &team->port_list, list) { 482 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 483 484 s_info = &lb_port_priv->stats_info; 485 __lb_stats_info_refresh_prepare(s_info); 486 for_each_possible_cpu(i) { 487 pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); 488 stats = per_cpu_ptr(lb_port_priv->pcpu_stats, i); 489 __lb_one_cpu_stats_add(&s_info->stats, stats, 490 &pcpu_stats->syncp); 491 } 492 changed |= __lb_stats_info_refresh_check(s_info, team); 493 } 494 495 if (changed) 496 team_options_change_check(team); 497 498 schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 499 (lb_priv_ex->stats.refresh_interval * HZ) / 10); 500 501 mutex_unlock(&team->lock); 502 } 503 504 static int lb_stats_refresh_interval_get(struct team *team, 505 struct team_gsetter_ctx *ctx) 506 { 507 struct lb_priv *lb_priv = get_lb_priv(team); 508 509 ctx->data.u32_val = lb_priv->ex->stats.refresh_interval; 510 return 0; 511 } 512 513 static int lb_stats_refresh_interval_set(struct team *team, 514 struct team_gsetter_ctx *ctx) 515 { 516 struct lb_priv *lb_priv = get_lb_priv(team); 517 unsigned int interval; 518 519 interval = ctx->data.u32_val; 520 if (lb_priv->ex->stats.refresh_interval == interval) 521 return 0; 522 lb_priv->ex->stats.refresh_interval = interval; 523 if (interval) 524 schedule_delayed_work(&lb_priv->ex->stats.refresh_dw, 0); 525 else 526 cancel_delayed_work(&lb_priv->ex->stats.refresh_dw); 527 return 0; 528 } 529 530 static const struct team_option lb_options[] = { 531 { 532 .name = "bpf_hash_func", 533 .type = TEAM_OPTION_TYPE_BINARY, 534 .getter = lb_bpf_func_get, 535 .setter = lb_bpf_func_set, 536 }, 537 { 538 .name = "lb_tx_method", 539 .type = TEAM_OPTION_TYPE_STRING, 540 .getter = lb_tx_method_get, 541 .setter = lb_tx_method_set, 542 }, 543 { 544 .name = "lb_tx_hash_to_port_mapping", 545 .array_size = LB_TX_HASHTABLE_SIZE, 546 .type = TEAM_OPTION_TYPE_U32, 547 .init = lb_tx_hash_to_port_mapping_init, 548 .getter = lb_tx_hash_to_port_mapping_get, 549 .setter = lb_tx_hash_to_port_mapping_set, 550 }, 551 { 552 .name = "lb_hash_stats", 553 .array_size = LB_TX_HASHTABLE_SIZE, 554 .type = TEAM_OPTION_TYPE_BINARY, 555 .init = lb_hash_stats_init, 556 .getter = lb_hash_stats_get, 557 }, 558 { 559 .name = "lb_port_stats", 560 .per_port = true, 561 .type = TEAM_OPTION_TYPE_BINARY, 562 .init = lb_port_stats_init, 563 .getter = lb_port_stats_get, 564 }, 565 { 566 .name = "lb_stats_refresh_interval", 567 .type = TEAM_OPTION_TYPE_U32, 568 .getter = lb_stats_refresh_interval_get, 569 .setter = lb_stats_refresh_interval_set, 570 }, 571 }; 572 573 static int lb_init(struct team *team) 574 { 575 struct lb_priv *lb_priv = get_lb_priv(team); 576 lb_select_tx_port_func_t *func; 577 int i, err; 578 579 /* set default tx port selector */ 580 func = lb_select_tx_port_get_func("hash"); 581 BUG_ON(!func); 582 rcu_assign_pointer(lb_priv->select_tx_port_func, func); 583 584 lb_priv->ex = kzalloc(sizeof(*lb_priv->ex), GFP_KERNEL); 585 if (!lb_priv->ex) 586 return -ENOMEM; 587 lb_priv->ex->team = team; 588 589 lb_priv->pcpu_stats = alloc_percpu(struct lb_pcpu_stats); 590 if (!lb_priv->pcpu_stats) { 591 err = -ENOMEM; 592 goto err_alloc_pcpu_stats; 593 } 594 595 for_each_possible_cpu(i) { 596 struct lb_pcpu_stats *team_lb_stats; 597 team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); 598 u64_stats_init(&team_lb_stats->syncp); 599 } 600 601 602 INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); 603 604 err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); 605 if (err) 606 goto err_options_register; 607 return 0; 608 609 err_options_register: 610 free_percpu(lb_priv->pcpu_stats); 611 err_alloc_pcpu_stats: 612 kfree(lb_priv->ex); 613 return err; 614 } 615 616 static void lb_exit(struct team *team) 617 { 618 struct lb_priv *lb_priv = get_lb_priv(team); 619 620 team_options_unregister(team, lb_options, 621 ARRAY_SIZE(lb_options)); 622 cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw); 623 free_percpu(lb_priv->pcpu_stats); 624 kfree(lb_priv->ex); 625 } 626 627 static int lb_port_enter(struct team *team, struct team_port *port) 628 { 629 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 630 631 lb_port_priv->pcpu_stats = alloc_percpu(struct lb_stats); 632 if (!lb_port_priv->pcpu_stats) 633 return -ENOMEM; 634 return 0; 635 } 636 637 static void lb_port_leave(struct team *team, struct team_port *port) 638 { 639 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 640 641 free_percpu(lb_port_priv->pcpu_stats); 642 } 643 644 static void lb_port_disabled(struct team *team, struct team_port *port) 645 { 646 lb_tx_hash_to_port_mapping_null_port(team, port); 647 } 648 649 static const struct team_mode_ops lb_mode_ops = { 650 .init = lb_init, 651 .exit = lb_exit, 652 .port_enter = lb_port_enter, 653 .port_leave = lb_port_leave, 654 .port_disabled = lb_port_disabled, 655 .transmit = lb_transmit, 656 }; 657 658 static const struct team_mode lb_mode = { 659 .kind = "loadbalance", 660 .owner = THIS_MODULE, 661 .priv_size = sizeof(struct lb_priv), 662 .port_priv_size = sizeof(struct lb_port_priv), 663 .ops = &lb_mode_ops, 664 .lag_tx_type = NETDEV_LAG_TX_TYPE_HASH, 665 }; 666 667 static int __init lb_init_module(void) 668 { 669 return team_mode_register(&lb_mode); 670 } 671 672 static void __exit lb_cleanup_module(void) 673 { 674 team_mode_unregister(&lb_mode); 675 } 676 677 module_init(lb_init_module); 678 module_exit(lb_cleanup_module); 679 680 MODULE_LICENSE("GPL v2"); 681 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); 682 MODULE_DESCRIPTION("Load-balancing mode for team"); 683 MODULE_ALIAS("team-mode-loadbalance"); 684