1 /* 2 * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team 3 * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 #include <linux/module.h> 14 #include <linux/init.h> 15 #include <linux/errno.h> 16 #include <linux/netdevice.h> 17 #include <linux/filter.h> 18 #include <linux/if_team.h> 19 20 struct lb_priv; 21 22 typedef struct team_port *lb_select_tx_port_func_t(struct team *, 23 struct lb_priv *, 24 struct sk_buff *, 25 unsigned char); 26 27 #define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */ 28 29 struct lb_stats { 30 u64 tx_bytes; 31 }; 32 33 struct lb_pcpu_stats { 34 struct lb_stats hash_stats[LB_TX_HASHTABLE_SIZE]; 35 struct u64_stats_sync syncp; 36 }; 37 38 struct lb_stats_info { 39 struct lb_stats stats; 40 struct lb_stats last_stats; 41 struct team_option_inst_info *opt_inst_info; 42 }; 43 44 struct lb_port_mapping { 45 struct team_port __rcu *port; 46 struct team_option_inst_info *opt_inst_info; 47 }; 48 49 struct lb_priv_ex { 50 struct team *team; 51 struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE]; 52 struct sock_fprog *orig_fprog; 53 struct { 54 unsigned int refresh_interval; /* in tenths of second */ 55 struct delayed_work refresh_dw; 56 struct lb_stats_info info[LB_TX_HASHTABLE_SIZE]; 57 } stats; 58 }; 59 60 struct lb_priv { 61 struct sk_filter __rcu *fp; 62 lb_select_tx_port_func_t __rcu *select_tx_port_func; 63 struct lb_pcpu_stats __percpu *pcpu_stats; 64 struct lb_priv_ex *ex; /* priv extension */ 65 }; 66 67 static struct lb_priv *get_lb_priv(struct team *team) 68 { 69 return (struct lb_priv *) &team->mode_priv; 70 } 71 72 struct lb_port_priv { 73 struct lb_stats __percpu *pcpu_stats; 74 struct lb_stats_info stats_info; 75 }; 76 77 static struct lb_port_priv *get_lb_port_priv(struct team_port *port) 78 { 79 return (struct lb_port_priv *) &port->mode_priv; 80 } 81 82 #define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \ 83 (lb_priv)->ex->tx_hash_to_port_mapping[hash].port 84 85 #define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \ 86 (lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info 87 88 static void lb_tx_hash_to_port_mapping_null_port(struct team *team, 89 struct team_port *port) 90 { 91 struct lb_priv *lb_priv = get_lb_priv(team); 92 bool changed = false; 93 int i; 94 95 for (i = 0; i < LB_TX_HASHTABLE_SIZE; i++) { 96 struct lb_port_mapping *pm; 97 98 pm = &lb_priv->ex->tx_hash_to_port_mapping[i]; 99 if (rcu_access_pointer(pm->port) == port) { 100 RCU_INIT_POINTER(pm->port, NULL); 101 team_option_inst_set_change(pm->opt_inst_info); 102 changed = true; 103 } 104 } 105 if (changed) 106 team_options_change_check(team); 107 } 108 109 /* Basic tx selection based solely by hash */ 110 static struct team_port *lb_hash_select_tx_port(struct team *team, 111 struct lb_priv *lb_priv, 112 struct sk_buff *skb, 113 unsigned char hash) 114 { 115 int port_index; 116 117 port_index = hash % team->en_port_count; 118 return team_get_port_by_index_rcu(team, port_index); 119 } 120 121 /* Hash to port mapping select tx port */ 122 static struct team_port *lb_htpm_select_tx_port(struct team *team, 123 struct lb_priv *lb_priv, 124 struct sk_buff *skb, 125 unsigned char hash) 126 { 127 return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); 128 } 129 130 struct lb_select_tx_port { 131 char *name; 132 lb_select_tx_port_func_t *func; 133 }; 134 135 static const struct lb_select_tx_port lb_select_tx_port_list[] = { 136 { 137 .name = "hash", 138 .func = lb_hash_select_tx_port, 139 }, 140 { 141 .name = "hash_to_port_mapping", 142 .func = lb_htpm_select_tx_port, 143 }, 144 }; 145 #define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list) 146 147 static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t *func) 148 { 149 int i; 150 151 for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { 152 const struct lb_select_tx_port *item; 153 154 item = &lb_select_tx_port_list[i]; 155 if (item->func == func) 156 return item->name; 157 } 158 return NULL; 159 } 160 161 static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name) 162 { 163 int i; 164 165 for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { 166 const struct lb_select_tx_port *item; 167 168 item = &lb_select_tx_port_list[i]; 169 if (!strcmp(item->name, name)) 170 return item->func; 171 } 172 return NULL; 173 } 174 175 static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv, 176 struct sk_buff *skb) 177 { 178 struct sk_filter *fp; 179 uint32_t lhash; 180 unsigned char *c; 181 182 fp = rcu_dereference_bh(lb_priv->fp); 183 if (unlikely(!fp)) 184 return 0; 185 lhash = SK_RUN_FILTER(fp, skb); 186 c = (char *) &lhash; 187 return c[0] ^ c[1] ^ c[2] ^ c[3]; 188 } 189 190 static void lb_update_tx_stats(unsigned int tx_bytes, struct lb_priv *lb_priv, 191 struct lb_port_priv *lb_port_priv, 192 unsigned char hash) 193 { 194 struct lb_pcpu_stats *pcpu_stats; 195 struct lb_stats *port_stats; 196 struct lb_stats *hash_stats; 197 198 pcpu_stats = this_cpu_ptr(lb_priv->pcpu_stats); 199 port_stats = this_cpu_ptr(lb_port_priv->pcpu_stats); 200 hash_stats = &pcpu_stats->hash_stats[hash]; 201 u64_stats_update_begin(&pcpu_stats->syncp); 202 port_stats->tx_bytes += tx_bytes; 203 hash_stats->tx_bytes += tx_bytes; 204 u64_stats_update_end(&pcpu_stats->syncp); 205 } 206 207 static bool lb_transmit(struct team *team, struct sk_buff *skb) 208 { 209 struct lb_priv *lb_priv = get_lb_priv(team); 210 lb_select_tx_port_func_t *select_tx_port_func; 211 struct team_port *port; 212 unsigned char hash; 213 unsigned int tx_bytes = skb->len; 214 215 hash = lb_get_skb_hash(lb_priv, skb); 216 select_tx_port_func = rcu_dereference_bh(lb_priv->select_tx_port_func); 217 port = select_tx_port_func(team, lb_priv, skb, hash); 218 if (unlikely(!port)) 219 goto drop; 220 if (team_dev_queue_xmit(team, port, skb)) 221 return false; 222 lb_update_tx_stats(tx_bytes, lb_priv, get_lb_port_priv(port), hash); 223 return true; 224 225 drop: 226 dev_kfree_skb_any(skb); 227 return false; 228 } 229 230 static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx) 231 { 232 struct lb_priv *lb_priv = get_lb_priv(team); 233 234 if (!lb_priv->ex->orig_fprog) { 235 ctx->data.bin_val.len = 0; 236 ctx->data.bin_val.ptr = NULL; 237 return 0; 238 } 239 ctx->data.bin_val.len = lb_priv->ex->orig_fprog->len * 240 sizeof(struct sock_filter); 241 ctx->data.bin_val.ptr = lb_priv->ex->orig_fprog->filter; 242 return 0; 243 } 244 245 static int __fprog_create(struct sock_fprog **pfprog, u32 data_len, 246 const void *data) 247 { 248 struct sock_fprog *fprog; 249 struct sock_filter *filter = (struct sock_filter *) data; 250 251 if (data_len % sizeof(struct sock_filter)) 252 return -EINVAL; 253 fprog = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL); 254 if (!fprog) 255 return -ENOMEM; 256 fprog->filter = kmemdup(filter, data_len, GFP_KERNEL); 257 if (!fprog->filter) { 258 kfree(fprog); 259 return -ENOMEM; 260 } 261 fprog->len = data_len / sizeof(struct sock_filter); 262 *pfprog = fprog; 263 return 0; 264 } 265 266 static void __fprog_destroy(struct sock_fprog *fprog) 267 { 268 kfree(fprog->filter); 269 kfree(fprog); 270 } 271 272 static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) 273 { 274 struct lb_priv *lb_priv = get_lb_priv(team); 275 struct sk_filter *fp = NULL; 276 struct sk_filter *orig_fp; 277 struct sock_fprog *fprog = NULL; 278 int err; 279 280 if (ctx->data.bin_val.len) { 281 err = __fprog_create(&fprog, ctx->data.bin_val.len, 282 ctx->data.bin_val.ptr); 283 if (err) 284 return err; 285 err = sk_unattached_filter_create(&fp, fprog); 286 if (err) { 287 __fprog_destroy(fprog); 288 return err; 289 } 290 } 291 292 if (lb_priv->ex->orig_fprog) { 293 /* Clear old filter data */ 294 __fprog_destroy(lb_priv->ex->orig_fprog); 295 orig_fp = rcu_dereference_protected(lb_priv->fp, 296 lockdep_is_held(&team->lock)); 297 sk_unattached_filter_destroy(orig_fp); 298 } 299 300 rcu_assign_pointer(lb_priv->fp, fp); 301 lb_priv->ex->orig_fprog = fprog; 302 return 0; 303 } 304 305 static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx) 306 { 307 struct lb_priv *lb_priv = get_lb_priv(team); 308 lb_select_tx_port_func_t *func; 309 char *name; 310 311 func = rcu_dereference_protected(lb_priv->select_tx_port_func, 312 lockdep_is_held(&team->lock)); 313 name = lb_select_tx_port_get_name(func); 314 BUG_ON(!name); 315 ctx->data.str_val = name; 316 return 0; 317 } 318 319 static int lb_tx_method_set(struct team *team, struct team_gsetter_ctx *ctx) 320 { 321 struct lb_priv *lb_priv = get_lb_priv(team); 322 lb_select_tx_port_func_t *func; 323 324 func = lb_select_tx_port_get_func(ctx->data.str_val); 325 if (!func) 326 return -EINVAL; 327 rcu_assign_pointer(lb_priv->select_tx_port_func, func); 328 return 0; 329 } 330 331 static int lb_tx_hash_to_port_mapping_init(struct team *team, 332 struct team_option_inst_info *info) 333 { 334 struct lb_priv *lb_priv = get_lb_priv(team); 335 unsigned char hash = info->array_index; 336 337 LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv, hash) = info; 338 return 0; 339 } 340 341 static int lb_tx_hash_to_port_mapping_get(struct team *team, 342 struct team_gsetter_ctx *ctx) 343 { 344 struct lb_priv *lb_priv = get_lb_priv(team); 345 struct team_port *port; 346 unsigned char hash = ctx->info->array_index; 347 348 port = LB_HTPM_PORT_BY_HASH(lb_priv, hash); 349 ctx->data.u32_val = port ? port->dev->ifindex : 0; 350 return 0; 351 } 352 353 static int lb_tx_hash_to_port_mapping_set(struct team *team, 354 struct team_gsetter_ctx *ctx) 355 { 356 struct lb_priv *lb_priv = get_lb_priv(team); 357 struct team_port *port; 358 unsigned char hash = ctx->info->array_index; 359 360 list_for_each_entry(port, &team->port_list, list) { 361 if (ctx->data.u32_val == port->dev->ifindex && 362 team_port_enabled(port)) { 363 rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv, hash), 364 port); 365 return 0; 366 } 367 } 368 return -ENODEV; 369 } 370 371 static int lb_hash_stats_init(struct team *team, 372 struct team_option_inst_info *info) 373 { 374 struct lb_priv *lb_priv = get_lb_priv(team); 375 unsigned char hash = info->array_index; 376 377 lb_priv->ex->stats.info[hash].opt_inst_info = info; 378 return 0; 379 } 380 381 static int lb_hash_stats_get(struct team *team, struct team_gsetter_ctx *ctx) 382 { 383 struct lb_priv *lb_priv = get_lb_priv(team); 384 unsigned char hash = ctx->info->array_index; 385 386 ctx->data.bin_val.ptr = &lb_priv->ex->stats.info[hash].stats; 387 ctx->data.bin_val.len = sizeof(struct lb_stats); 388 return 0; 389 } 390 391 static int lb_port_stats_init(struct team *team, 392 struct team_option_inst_info *info) 393 { 394 struct team_port *port = info->port; 395 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 396 397 lb_port_priv->stats_info.opt_inst_info = info; 398 return 0; 399 } 400 401 static int lb_port_stats_get(struct team *team, struct team_gsetter_ctx *ctx) 402 { 403 struct team_port *port = ctx->info->port; 404 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 405 406 ctx->data.bin_val.ptr = &lb_port_priv->stats_info.stats; 407 ctx->data.bin_val.len = sizeof(struct lb_stats); 408 return 0; 409 } 410 411 static void __lb_stats_info_refresh_prepare(struct lb_stats_info *s_info) 412 { 413 memcpy(&s_info->last_stats, &s_info->stats, sizeof(struct lb_stats)); 414 memset(&s_info->stats, 0, sizeof(struct lb_stats)); 415 } 416 417 static bool __lb_stats_info_refresh_check(struct lb_stats_info *s_info, 418 struct team *team) 419 { 420 if (memcmp(&s_info->last_stats, &s_info->stats, 421 sizeof(struct lb_stats))) { 422 team_option_inst_set_change(s_info->opt_inst_info); 423 return true; 424 } 425 return false; 426 } 427 428 static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats, 429 struct lb_stats *cpu_stats, 430 struct u64_stats_sync *syncp) 431 { 432 unsigned int start; 433 struct lb_stats tmp; 434 435 do { 436 start = u64_stats_fetch_begin_bh(syncp); 437 tmp.tx_bytes = cpu_stats->tx_bytes; 438 } while (u64_stats_fetch_retry_bh(syncp, start)); 439 acc_stats->tx_bytes += tmp.tx_bytes; 440 } 441 442 static void lb_stats_refresh(struct work_struct *work) 443 { 444 struct team *team; 445 struct lb_priv *lb_priv; 446 struct lb_priv_ex *lb_priv_ex; 447 struct lb_pcpu_stats *pcpu_stats; 448 struct lb_stats *stats; 449 struct lb_stats_info *s_info; 450 struct team_port *port; 451 bool changed = false; 452 int i; 453 int j; 454 455 lb_priv_ex = container_of(work, struct lb_priv_ex, 456 stats.refresh_dw.work); 457 458 team = lb_priv_ex->team; 459 lb_priv = get_lb_priv(team); 460 461 if (!mutex_trylock(&team->lock)) { 462 schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); 463 return; 464 } 465 466 for (j = 0; j < LB_TX_HASHTABLE_SIZE; j++) { 467 s_info = &lb_priv->ex->stats.info[j]; 468 __lb_stats_info_refresh_prepare(s_info); 469 for_each_possible_cpu(i) { 470 pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); 471 stats = &pcpu_stats->hash_stats[j]; 472 __lb_one_cpu_stats_add(&s_info->stats, stats, 473 &pcpu_stats->syncp); 474 } 475 changed |= __lb_stats_info_refresh_check(s_info, team); 476 } 477 478 list_for_each_entry(port, &team->port_list, list) { 479 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 480 481 s_info = &lb_port_priv->stats_info; 482 __lb_stats_info_refresh_prepare(s_info); 483 for_each_possible_cpu(i) { 484 pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); 485 stats = per_cpu_ptr(lb_port_priv->pcpu_stats, i); 486 __lb_one_cpu_stats_add(&s_info->stats, stats, 487 &pcpu_stats->syncp); 488 } 489 changed |= __lb_stats_info_refresh_check(s_info, team); 490 } 491 492 if (changed) 493 team_options_change_check(team); 494 495 schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 496 (lb_priv_ex->stats.refresh_interval * HZ) / 10); 497 498 mutex_unlock(&team->lock); 499 } 500 501 static int lb_stats_refresh_interval_get(struct team *team, 502 struct team_gsetter_ctx *ctx) 503 { 504 struct lb_priv *lb_priv = get_lb_priv(team); 505 506 ctx->data.u32_val = lb_priv->ex->stats.refresh_interval; 507 return 0; 508 } 509 510 static int lb_stats_refresh_interval_set(struct team *team, 511 struct team_gsetter_ctx *ctx) 512 { 513 struct lb_priv *lb_priv = get_lb_priv(team); 514 unsigned int interval; 515 516 interval = ctx->data.u32_val; 517 if (lb_priv->ex->stats.refresh_interval == interval) 518 return 0; 519 lb_priv->ex->stats.refresh_interval = interval; 520 if (interval) 521 schedule_delayed_work(&lb_priv->ex->stats.refresh_dw, 0); 522 else 523 cancel_delayed_work(&lb_priv->ex->stats.refresh_dw); 524 return 0; 525 } 526 527 static const struct team_option lb_options[] = { 528 { 529 .name = "bpf_hash_func", 530 .type = TEAM_OPTION_TYPE_BINARY, 531 .getter = lb_bpf_func_get, 532 .setter = lb_bpf_func_set, 533 }, 534 { 535 .name = "lb_tx_method", 536 .type = TEAM_OPTION_TYPE_STRING, 537 .getter = lb_tx_method_get, 538 .setter = lb_tx_method_set, 539 }, 540 { 541 .name = "lb_tx_hash_to_port_mapping", 542 .array_size = LB_TX_HASHTABLE_SIZE, 543 .type = TEAM_OPTION_TYPE_U32, 544 .init = lb_tx_hash_to_port_mapping_init, 545 .getter = lb_tx_hash_to_port_mapping_get, 546 .setter = lb_tx_hash_to_port_mapping_set, 547 }, 548 { 549 .name = "lb_hash_stats", 550 .array_size = LB_TX_HASHTABLE_SIZE, 551 .type = TEAM_OPTION_TYPE_BINARY, 552 .init = lb_hash_stats_init, 553 .getter = lb_hash_stats_get, 554 }, 555 { 556 .name = "lb_port_stats", 557 .per_port = true, 558 .type = TEAM_OPTION_TYPE_BINARY, 559 .init = lb_port_stats_init, 560 .getter = lb_port_stats_get, 561 }, 562 { 563 .name = "lb_stats_refresh_interval", 564 .type = TEAM_OPTION_TYPE_U32, 565 .getter = lb_stats_refresh_interval_get, 566 .setter = lb_stats_refresh_interval_set, 567 }, 568 }; 569 570 static int lb_init(struct team *team) 571 { 572 struct lb_priv *lb_priv = get_lb_priv(team); 573 lb_select_tx_port_func_t *func; 574 int err; 575 576 /* set default tx port selector */ 577 func = lb_select_tx_port_get_func("hash"); 578 BUG_ON(!func); 579 rcu_assign_pointer(lb_priv->select_tx_port_func, func); 580 581 lb_priv->ex = kzalloc(sizeof(*lb_priv->ex), GFP_KERNEL); 582 if (!lb_priv->ex) 583 return -ENOMEM; 584 lb_priv->ex->team = team; 585 586 lb_priv->pcpu_stats = alloc_percpu(struct lb_pcpu_stats); 587 if (!lb_priv->pcpu_stats) { 588 err = -ENOMEM; 589 goto err_alloc_pcpu_stats; 590 } 591 592 INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); 593 594 err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); 595 if (err) 596 goto err_options_register; 597 return 0; 598 599 err_options_register: 600 free_percpu(lb_priv->pcpu_stats); 601 err_alloc_pcpu_stats: 602 kfree(lb_priv->ex); 603 return err; 604 } 605 606 static void lb_exit(struct team *team) 607 { 608 struct lb_priv *lb_priv = get_lb_priv(team); 609 610 team_options_unregister(team, lb_options, 611 ARRAY_SIZE(lb_options)); 612 cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw); 613 free_percpu(lb_priv->pcpu_stats); 614 kfree(lb_priv->ex); 615 } 616 617 static int lb_port_enter(struct team *team, struct team_port *port) 618 { 619 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 620 621 lb_port_priv->pcpu_stats = alloc_percpu(struct lb_stats); 622 if (!lb_port_priv->pcpu_stats) 623 return -ENOMEM; 624 return 0; 625 } 626 627 static void lb_port_leave(struct team *team, struct team_port *port) 628 { 629 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); 630 631 free_percpu(lb_port_priv->pcpu_stats); 632 } 633 634 static void lb_port_disabled(struct team *team, struct team_port *port) 635 { 636 lb_tx_hash_to_port_mapping_null_port(team, port); 637 } 638 639 static const struct team_mode_ops lb_mode_ops = { 640 .init = lb_init, 641 .exit = lb_exit, 642 .port_enter = lb_port_enter, 643 .port_leave = lb_port_leave, 644 .port_disabled = lb_port_disabled, 645 .transmit = lb_transmit, 646 }; 647 648 static const struct team_mode lb_mode = { 649 .kind = "loadbalance", 650 .owner = THIS_MODULE, 651 .priv_size = sizeof(struct lb_priv), 652 .port_priv_size = sizeof(struct lb_port_priv), 653 .ops = &lb_mode_ops, 654 }; 655 656 static int __init lb_init_module(void) 657 { 658 return team_mode_register(&lb_mode); 659 } 660 661 static void __exit lb_cleanup_module(void) 662 { 663 team_mode_unregister(&lb_mode); 664 } 665 666 module_init(lb_init_module); 667 module_exit(lb_cleanup_module); 668 669 MODULE_LICENSE("GPL v2"); 670 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); 671 MODULE_DESCRIPTION("Load-balancing mode for team"); 672 MODULE_ALIAS("team-mode-loadbalance"); 673