1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/netdevice.h> 34 #include <net/bonding.h> 35 #include <linux/mlx5/driver.h> 36 #include <linux/mlx5/eswitch.h> 37 #include <linux/mlx5/vport.h> 38 #include "lib/devcom.h" 39 #include "mlx5_core.h" 40 #include "eswitch.h" 41 #include "esw/acl/ofld.h" 42 #include "lag.h" 43 #include "mp.h" 44 #include "mpesw.h" 45 46 enum { 47 MLX5_LAG_EGRESS_PORT_1 = 1, 48 MLX5_LAG_EGRESS_PORT_2, 49 }; 50 51 /* General purpose, use for short periods of time. 52 * Beware of lock dependencies (preferably, no locks should be acquired 53 * under it). 54 */ 55 static DEFINE_SPINLOCK(lag_lock); 56 57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 58 { 59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 61 62 if (mode == MLX5_LAG_MODE_MPESW) 63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 64 65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 66 } 67 68 static u8 lag_active_port_bits(struct mlx5_lag *ldev) 69 { 70 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 71 u8 active_port = 0; 72 int num_enabled; 73 int idx; 74 75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, 76 &num_enabled); 77 for (idx = 0; idx < num_enabled; idx++) 78 active_port |= BIT_MASK(enabled_ports[idx]); 79 80 return active_port; 81 } 82 83 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, 84 unsigned long flags) 85 { 86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 87 &flags); 88 int port_sel_mode = get_port_sel_mode(mode, flags); 89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 96 switch (port_sel_mode) { 97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 100 break; 101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 103 break; 104 105 MLX5_SET(lagc, lag_ctx, active_port, 106 lag_active_port_bits(mlx5_lag_dev(dev))); 107 break; 108 default: 109 break; 110 } 111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 112 113 return mlx5_cmd_exec_in(dev, create_lag, in); 114 } 115 116 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 117 u8 *ports) 118 { 119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 121 122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 123 MLX5_SET(modify_lag_in, in, field_select, 0x1); 124 125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 127 128 return mlx5_cmd_exec_in(dev, modify_lag, in); 129 } 130 131 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 132 { 133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 134 135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 136 137 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 138 } 139 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 140 141 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 142 { 143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 144 145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 146 147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 148 } 149 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 150 151 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 152 u8 *ports, int *num_disabled) 153 { 154 int i; 155 156 *num_disabled = 0; 157 for (i = 0; i < num_ports; i++) { 158 if (!tracker->netdev_state[i].tx_enabled || 159 !tracker->netdev_state[i].link_up) 160 ports[(*num_disabled)++] = i; 161 } 162 } 163 164 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 165 u8 *ports, int *num_enabled) 166 { 167 int i; 168 169 *num_enabled = 0; 170 for (i = 0; i < num_ports; i++) { 171 if (tracker->netdev_state[i].tx_enabled && 172 tracker->netdev_state[i].link_up) 173 ports[(*num_enabled)++] = i; 174 } 175 176 if (*num_enabled == 0) 177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 178 } 179 180 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 181 struct mlx5_lag *ldev, 182 struct lag_tracker *tracker, 183 unsigned long flags) 184 { 185 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 186 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 187 int written = 0; 188 int num_enabled; 189 int idx; 190 int err; 191 int i; 192 int j; 193 194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 196 &num_enabled); 197 for (i = 0; i < num_enabled; i++) { 198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 199 if (err != 3) 200 return; 201 written += err; 202 } 203 buf[written - 2] = 0; 204 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 205 } else { 206 for (i = 0; i < ldev->ports; i++) { 207 for (j = 0; j < ldev->buckets; j++) { 208 idx = i * ldev->buckets + j; 209 err = scnprintf(buf + written, 10, 210 " port %d:%d", i + 1, ldev->v2p_map[idx]); 211 if (err != 9) 212 return; 213 written += err; 214 } 215 } 216 mlx5_core_info(dev, "lag map:%s\n", buf); 217 } 218 } 219 220 static int mlx5_lag_netdev_event(struct notifier_block *this, 221 unsigned long event, void *ptr); 222 static void mlx5_do_bond_work(struct work_struct *work); 223 224 static void mlx5_ldev_free(struct kref *ref) 225 { 226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 227 228 if (ldev->nb.notifier_call) 229 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 230 mlx5_lag_mp_cleanup(ldev); 231 cancel_delayed_work_sync(&ldev->bond_work); 232 destroy_workqueue(ldev->wq); 233 mutex_destroy(&ldev->lock); 234 kfree(ldev); 235 } 236 237 static void mlx5_ldev_put(struct mlx5_lag *ldev) 238 { 239 kref_put(&ldev->ref, mlx5_ldev_free); 240 } 241 242 static void mlx5_ldev_get(struct mlx5_lag *ldev) 243 { 244 kref_get(&ldev->ref); 245 } 246 247 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 248 { 249 struct mlx5_lag *ldev; 250 int err; 251 252 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); 253 if (!ldev) 254 return NULL; 255 256 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 257 if (!ldev->wq) { 258 kfree(ldev); 259 return NULL; 260 } 261 262 kref_init(&ldev->ref); 263 mutex_init(&ldev->lock); 264 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 265 266 ldev->nb.notifier_call = mlx5_lag_netdev_event; 267 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { 268 ldev->nb.notifier_call = NULL; 269 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 270 } 271 ldev->mode = MLX5_LAG_MODE_NONE; 272 273 err = mlx5_lag_mp_init(ldev); 274 if (err) 275 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 276 err); 277 278 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 279 ldev->buckets = 1; 280 281 return ldev; 282 } 283 284 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 285 struct net_device *ndev) 286 { 287 int i; 288 289 for (i = 0; i < ldev->ports; i++) 290 if (ldev->pf[i].netdev == ndev) 291 return i; 292 293 return -ENOENT; 294 } 295 296 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 297 { 298 return ldev->mode == MLX5_LAG_MODE_ROCE; 299 } 300 301 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 302 { 303 return ldev->mode == MLX5_LAG_MODE_SRIOV; 304 } 305 306 /* Create a mapping between steering slots and active ports. 307 * As we have ldev->buckets slots per port first assume the native 308 * mapping should be used. 309 * If there are ports that are disabled fill the relevant slots 310 * with mapping that points to active ports. 311 */ 312 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 313 u8 num_ports, 314 u8 buckets, 315 u8 *ports) 316 { 317 int disabled[MLX5_MAX_PORTS] = {}; 318 int enabled[MLX5_MAX_PORTS] = {}; 319 int disabled_ports_num = 0; 320 int enabled_ports_num = 0; 321 int idx; 322 u32 rand; 323 int i; 324 int j; 325 326 for (i = 0; i < num_ports; i++) { 327 if (tracker->netdev_state[i].tx_enabled && 328 tracker->netdev_state[i].link_up) 329 enabled[enabled_ports_num++] = i; 330 else 331 disabled[disabled_ports_num++] = i; 332 } 333 334 /* Use native mapping by default where each port's buckets 335 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 336 */ 337 for (i = 0; i < num_ports; i++) 338 for (j = 0; j < buckets; j++) { 339 idx = i * buckets + j; 340 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 341 } 342 343 /* If all ports are disabled/enabled keep native mapping */ 344 if (enabled_ports_num == num_ports || 345 disabled_ports_num == num_ports) 346 return; 347 348 /* Go over the disabled ports and for each assign a random active port */ 349 for (i = 0; i < disabled_ports_num; i++) { 350 for (j = 0; j < buckets; j++) { 351 get_random_bytes(&rand, 4); 352 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 353 } 354 } 355 } 356 357 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 358 { 359 int i; 360 361 for (i = 0; i < ldev->ports; i++) 362 if (ldev->pf[i].has_drop) 363 return true; 364 return false; 365 } 366 367 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 368 { 369 int i; 370 371 for (i = 0; i < ldev->ports; i++) { 372 if (!ldev->pf[i].has_drop) 373 continue; 374 375 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, 376 MLX5_VPORT_UPLINK); 377 ldev->pf[i].has_drop = false; 378 } 379 } 380 381 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 382 struct lag_tracker *tracker) 383 { 384 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 385 struct mlx5_core_dev *dev; 386 int disabled_index; 387 int num_disabled; 388 int err; 389 int i; 390 391 /* First delete the current drop rule so there won't be any dropped 392 * packets 393 */ 394 mlx5_lag_drop_rule_cleanup(ldev); 395 396 if (!ldev->tracker.has_inactive) 397 return; 398 399 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 400 401 for (i = 0; i < num_disabled; i++) { 402 disabled_index = disabled_ports[i]; 403 dev = ldev->pf[disabled_index].dev; 404 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 405 MLX5_VPORT_UPLINK); 406 if (!err) 407 ldev->pf[disabled_index].has_drop = true; 408 else 409 mlx5_core_err(dev, 410 "Failed to create lag drop rule, error: %d", err); 411 } 412 } 413 414 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 415 { 416 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 417 void *lag_ctx; 418 419 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 420 421 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 422 MLX5_SET(modify_lag_in, in, field_select, 0x2); 423 424 MLX5_SET(lagc, lag_ctx, active_port, ports); 425 426 return mlx5_cmd_exec_in(dev, modify_lag, in); 427 } 428 429 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 430 { 431 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 432 u8 active_ports; 433 int ret; 434 435 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 436 ret = mlx5_lag_port_sel_modify(ldev, ports); 437 if (ret || 438 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 439 return ret; 440 441 active_ports = lag_active_port_bits(ldev); 442 443 return mlx5_cmd_modify_active_port(dev0, active_ports); 444 } 445 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 446 } 447 448 void mlx5_modify_lag(struct mlx5_lag *ldev, 449 struct lag_tracker *tracker) 450 { 451 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 452 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 453 int idx; 454 int err; 455 int i; 456 int j; 457 458 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 459 460 for (i = 0; i < ldev->ports; i++) { 461 for (j = 0; j < ldev->buckets; j++) { 462 idx = i * ldev->buckets + j; 463 if (ports[idx] == ldev->v2p_map[idx]) 464 continue; 465 err = _mlx5_modify_lag(ldev, ports); 466 if (err) { 467 mlx5_core_err(dev0, 468 "Failed to modify LAG (%d)\n", 469 err); 470 return; 471 } 472 memcpy(ldev->v2p_map, ports, sizeof(ports)); 473 474 mlx5_lag_print_mapping(dev0, ldev, tracker, 475 ldev->mode_flags); 476 break; 477 } 478 } 479 480 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 481 !(ldev->mode == MLX5_LAG_MODE_ROCE)) 482 mlx5_lag_drop_rule_setup(ldev, tracker); 483 } 484 485 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 486 unsigned long *flags) 487 { 488 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 489 490 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 491 if (ldev->ports > 2) 492 return -EINVAL; 493 return 0; 494 } 495 496 if (ldev->ports > 2) 497 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 498 499 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 500 501 return 0; 502 } 503 504 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 505 struct lag_tracker *tracker, 506 enum mlx5_lag_mode mode, 507 unsigned long *flags) 508 { 509 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 510 511 if (mode == MLX5_LAG_MODE_MPESW) 512 return; 513 514 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 515 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) { 516 if (ldev->ports > 2) 517 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 518 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 519 } 520 } 521 522 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 523 struct lag_tracker *tracker, bool shared_fdb, 524 unsigned long *flags) 525 { 526 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 527 528 *flags = 0; 529 if (shared_fdb) { 530 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 531 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 532 } 533 534 if (mode == MLX5_LAG_MODE_MPESW) 535 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 536 537 if (roce_lag) 538 return mlx5_lag_set_port_sel_mode_roce(ldev, flags); 539 540 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); 541 return 0; 542 } 543 544 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 545 { 546 int port_sel_mode = get_port_sel_mode(mode, flags); 547 548 switch (port_sel_mode) { 549 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 550 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 551 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 552 default: return "invalid"; 553 } 554 } 555 556 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) 557 { 558 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 559 struct mlx5_eswitch *master_esw = dev0->priv.eswitch; 560 int err; 561 int i; 562 563 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { 564 struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; 565 566 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, 567 slave_esw, ldev->ports); 568 if (err) 569 goto err; 570 } 571 return 0; 572 err: 573 for (; i > MLX5_LAG_P1; i--) 574 mlx5_eswitch_offloads_single_fdb_del_one(master_esw, 575 ldev->pf[i].dev->priv.eswitch); 576 return err; 577 } 578 579 static int mlx5_create_lag(struct mlx5_lag *ldev, 580 struct lag_tracker *tracker, 581 enum mlx5_lag_mode mode, 582 unsigned long flags) 583 { 584 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 585 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 586 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 587 int err; 588 589 if (tracker) 590 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 591 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 592 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 593 594 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); 595 if (err) { 596 mlx5_core_err(dev0, 597 "Failed to create LAG (%d)\n", 598 err); 599 return err; 600 } 601 602 if (shared_fdb) { 603 err = mlx5_lag_create_single_fdb(ldev); 604 if (err) 605 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 606 else 607 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 608 } 609 610 if (err) { 611 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 612 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 613 mlx5_core_err(dev0, 614 "Failed to deactivate RoCE LAG; driver restart required\n"); 615 } 616 617 return err; 618 } 619 620 int mlx5_activate_lag(struct mlx5_lag *ldev, 621 struct lag_tracker *tracker, 622 enum mlx5_lag_mode mode, 623 bool shared_fdb) 624 { 625 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 626 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 627 unsigned long flags = 0; 628 int err; 629 630 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 631 if (err) 632 return err; 633 634 if (mode != MLX5_LAG_MODE_MPESW) { 635 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 636 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 637 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 638 ldev->v2p_map); 639 if (err) { 640 mlx5_core_err(dev0, 641 "Failed to create LAG port selection(%d)\n", 642 err); 643 return err; 644 } 645 } 646 } 647 648 err = mlx5_create_lag(ldev, tracker, mode, flags); 649 if (err) { 650 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 651 mlx5_lag_port_sel_destroy(ldev); 652 if (roce_lag) 653 mlx5_core_err(dev0, 654 "Failed to activate RoCE LAG\n"); 655 else 656 mlx5_core_err(dev0, 657 "Failed to activate VF LAG\n" 658 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 659 return err; 660 } 661 662 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 663 !roce_lag) 664 mlx5_lag_drop_rule_setup(ldev, tracker); 665 666 ldev->mode = mode; 667 ldev->mode_flags = flags; 668 return 0; 669 } 670 671 int mlx5_deactivate_lag(struct mlx5_lag *ldev) 672 { 673 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 674 struct mlx5_eswitch *master_esw = dev0->priv.eswitch; 675 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 676 bool roce_lag = __mlx5_lag_is_roce(ldev); 677 unsigned long flags = ldev->mode_flags; 678 int err; 679 int i; 680 681 ldev->mode = MLX5_LAG_MODE_NONE; 682 ldev->mode_flags = 0; 683 mlx5_lag_mp_reset(ldev); 684 685 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 686 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) 687 mlx5_eswitch_offloads_single_fdb_del_one(master_esw, 688 ldev->pf[i].dev->priv.eswitch); 689 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 690 } 691 692 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 693 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 694 if (err) { 695 if (roce_lag) { 696 mlx5_core_err(dev0, 697 "Failed to deactivate RoCE LAG; driver restart required\n"); 698 } else { 699 mlx5_core_err(dev0, 700 "Failed to deactivate VF LAG; driver restart required\n" 701 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 702 } 703 return err; 704 } 705 706 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 707 mlx5_lag_port_sel_destroy(ldev); 708 if (mlx5_lag_has_drop_rule(ldev)) 709 mlx5_lag_drop_rule_cleanup(ldev); 710 711 return 0; 712 } 713 714 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 4 715 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 716 { 717 #ifdef CONFIG_MLX5_ESWITCH 718 struct mlx5_core_dev *dev; 719 u8 mode; 720 #endif 721 int i; 722 723 for (i = 0; i < ldev->ports; i++) 724 if (!ldev->pf[i].dev) 725 return false; 726 727 #ifdef CONFIG_MLX5_ESWITCH 728 for (i = 0; i < ldev->ports; i++) { 729 dev = ldev->pf[i].dev; 730 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) 731 return false; 732 } 733 734 dev = ldev->pf[MLX5_LAG_P1].dev; 735 mode = mlx5_eswitch_mode(dev); 736 for (i = 0; i < ldev->ports; i++) 737 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 738 return false; 739 740 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports > MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 741 return false; 742 #else 743 for (i = 0; i < ldev->ports; i++) 744 if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 745 return false; 746 #endif 747 return true; 748 } 749 750 void mlx5_lag_add_devices(struct mlx5_lag *ldev) 751 { 752 int i; 753 754 for (i = 0; i < ldev->ports; i++) { 755 if (!ldev->pf[i].dev) 756 continue; 757 758 if (ldev->pf[i].dev->priv.flags & 759 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 760 continue; 761 762 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 763 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 764 } 765 } 766 767 void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 768 { 769 int i; 770 771 for (i = 0; i < ldev->ports; i++) { 772 if (!ldev->pf[i].dev) 773 continue; 774 775 if (ldev->pf[i].dev->priv.flags & 776 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 777 continue; 778 779 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 780 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 781 } 782 } 783 784 void mlx5_disable_lag(struct mlx5_lag *ldev) 785 { 786 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 787 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 788 bool roce_lag; 789 int err; 790 int i; 791 792 roce_lag = __mlx5_lag_is_roce(ldev); 793 794 if (shared_fdb) { 795 mlx5_lag_remove_devices(ldev); 796 } else if (roce_lag) { 797 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 798 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 799 mlx5_rescan_drivers_locked(dev0); 800 } 801 for (i = 1; i < ldev->ports; i++) 802 mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 803 } 804 805 err = mlx5_deactivate_lag(ldev); 806 if (err) 807 return; 808 809 if (shared_fdb || roce_lag) 810 mlx5_lag_add_devices(ldev); 811 812 if (shared_fdb) 813 for (i = 0; i < ldev->ports; i++) 814 if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 815 mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); 816 } 817 818 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) 819 { 820 struct mlx5_core_dev *dev; 821 int i; 822 823 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { 824 dev = ldev->pf[i].dev; 825 if (is_mdev_switchdev_mode(dev) && 826 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && 827 MLX5_CAP_GEN(dev, lag_native_fdb_selection) && 828 MLX5_CAP_ESW(dev, root_ft_on_other_esw) && 829 mlx5_eswitch_get_npeers(dev->priv.eswitch) == 830 MLX5_CAP_GEN(dev, num_lag_ports) - 1) 831 continue; 832 return false; 833 } 834 835 dev = ldev->pf[MLX5_LAG_P1].dev; 836 if (is_mdev_switchdev_mode(dev) && 837 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && 838 mlx5_devcom_comp_is_ready(dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS) && 839 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && 840 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) 841 return true; 842 843 return false; 844 } 845 846 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 847 { 848 bool roce_lag = true; 849 int i; 850 851 for (i = 0; i < ldev->ports; i++) 852 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 853 854 #ifdef CONFIG_MLX5_ESWITCH 855 for (i = 0; i < ldev->ports; i++) 856 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); 857 #endif 858 859 return roce_lag; 860 } 861 862 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 863 { 864 return do_bond && __mlx5_lag_is_active(ldev) && 865 ldev->mode != MLX5_LAG_MODE_MPESW; 866 } 867 868 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 869 { 870 return !do_bond && __mlx5_lag_is_active(ldev) && 871 ldev->mode != MLX5_LAG_MODE_MPESW; 872 } 873 874 static void mlx5_do_bond(struct mlx5_lag *ldev) 875 { 876 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 877 struct lag_tracker tracker = { }; 878 bool do_bond, roce_lag; 879 int err; 880 int i; 881 882 if (!mlx5_lag_is_ready(ldev)) { 883 do_bond = false; 884 } else { 885 /* VF LAG is in multipath mode, ignore bond change requests */ 886 if (mlx5_lag_is_multipath(dev0)) 887 return; 888 889 tracker = ldev->tracker; 890 891 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 892 } 893 894 if (do_bond && !__mlx5_lag_is_active(ldev)) { 895 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 896 897 roce_lag = mlx5_lag_is_roce_lag(ldev); 898 899 if (shared_fdb || roce_lag) 900 mlx5_lag_remove_devices(ldev); 901 902 err = mlx5_activate_lag(ldev, &tracker, 903 roce_lag ? MLX5_LAG_MODE_ROCE : 904 MLX5_LAG_MODE_SRIOV, 905 shared_fdb); 906 if (err) { 907 if (shared_fdb || roce_lag) 908 mlx5_lag_add_devices(ldev); 909 910 return; 911 } else if (roce_lag) { 912 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 913 mlx5_rescan_drivers_locked(dev0); 914 for (i = 1; i < ldev->ports; i++) 915 mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 916 } else if (shared_fdb) { 917 int i; 918 919 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 920 mlx5_rescan_drivers_locked(dev0); 921 922 for (i = 0; i < ldev->ports; i++) { 923 err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); 924 if (err) 925 break; 926 } 927 928 if (err) { 929 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 930 mlx5_rescan_drivers_locked(dev0); 931 mlx5_deactivate_lag(ldev); 932 mlx5_lag_add_devices(ldev); 933 for (i = 0; i < ldev->ports; i++) 934 mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); 935 mlx5_core_err(dev0, "Failed to enable lag\n"); 936 return; 937 } 938 } 939 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 940 mlx5_modify_lag(ldev, &tracker); 941 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 942 mlx5_disable_lag(ldev); 943 } 944 } 945 946 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 947 { 948 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 949 } 950 951 static void mlx5_do_bond_work(struct work_struct *work) 952 { 953 struct delayed_work *delayed_work = to_delayed_work(work); 954 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 955 bond_work); 956 int status; 957 958 status = mlx5_dev_list_trylock(); 959 if (!status) { 960 mlx5_queue_bond_work(ldev, HZ); 961 return; 962 } 963 964 mutex_lock(&ldev->lock); 965 if (ldev->mode_changes_in_progress) { 966 mutex_unlock(&ldev->lock); 967 mlx5_dev_list_unlock(); 968 mlx5_queue_bond_work(ldev, HZ); 969 return; 970 } 971 972 mlx5_do_bond(ldev); 973 mutex_unlock(&ldev->lock); 974 mlx5_dev_list_unlock(); 975 } 976 977 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 978 struct lag_tracker *tracker, 979 struct netdev_notifier_changeupper_info *info) 980 { 981 struct net_device *upper = info->upper_dev, *ndev_tmp; 982 struct netdev_lag_upper_info *lag_upper_info = NULL; 983 bool is_bonded, is_in_lag, mode_supported; 984 bool has_inactive = 0; 985 struct slave *slave; 986 u8 bond_status = 0; 987 int num_slaves = 0; 988 int changed = 0; 989 int idx; 990 991 if (!netif_is_lag_master(upper)) 992 return 0; 993 994 if (info->linking) 995 lag_upper_info = info->upper_info; 996 997 /* The event may still be of interest if the slave does not belong to 998 * us, but is enslaved to a master which has one or more of our netdevs 999 * as slaves (e.g., if a new slave is added to a master that bonds two 1000 * of our netdevs, we should unbond). 1001 */ 1002 rcu_read_lock(); 1003 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 1004 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1005 if (idx >= 0) { 1006 slave = bond_slave_get_rcu(ndev_tmp); 1007 if (slave) 1008 has_inactive |= bond_is_slave_inactive(slave); 1009 bond_status |= (1 << idx); 1010 } 1011 1012 num_slaves++; 1013 } 1014 rcu_read_unlock(); 1015 1016 /* None of this lagdev's netdevs are slaves of this master. */ 1017 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 1018 return 0; 1019 1020 if (lag_upper_info) { 1021 tracker->tx_type = lag_upper_info->tx_type; 1022 tracker->hash_type = lag_upper_info->hash_type; 1023 } 1024 1025 tracker->has_inactive = has_inactive; 1026 /* Determine bonding status: 1027 * A device is considered bonded if both its physical ports are slaves 1028 * of the same lag master, and only them. 1029 */ 1030 is_in_lag = num_slaves == ldev->ports && 1031 bond_status == GENMASK(ldev->ports - 1, 0); 1032 1033 /* Lag mode must be activebackup or hash. */ 1034 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 1035 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1036 1037 is_bonded = is_in_lag && mode_supported; 1038 if (tracker->is_bonded != is_bonded) { 1039 tracker->is_bonded = is_bonded; 1040 changed = 1; 1041 } 1042 1043 if (!is_in_lag) 1044 return changed; 1045 1046 if (!mlx5_lag_is_ready(ldev)) 1047 NL_SET_ERR_MSG_MOD(info->info.extack, 1048 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1049 else if (!mode_supported) 1050 NL_SET_ERR_MSG_MOD(info->info.extack, 1051 "Can't activate LAG offload, TX type isn't supported"); 1052 1053 return changed; 1054 } 1055 1056 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1057 struct lag_tracker *tracker, 1058 struct net_device *ndev, 1059 struct netdev_notifier_changelowerstate_info *info) 1060 { 1061 struct netdev_lag_lower_state_info *lag_lower_info; 1062 int idx; 1063 1064 if (!netif_is_lag_port(ndev)) 1065 return 0; 1066 1067 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1068 if (idx < 0) 1069 return 0; 1070 1071 /* This information is used to determine virtual to physical 1072 * port mapping. 1073 */ 1074 lag_lower_info = info->lower_state_info; 1075 if (!lag_lower_info) 1076 return 0; 1077 1078 tracker->netdev_state[idx] = *lag_lower_info; 1079 1080 return 1; 1081 } 1082 1083 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1084 struct lag_tracker *tracker, 1085 struct net_device *ndev) 1086 { 1087 struct net_device *ndev_tmp; 1088 struct slave *slave; 1089 bool has_inactive = 0; 1090 int idx; 1091 1092 if (!netif_is_lag_master(ndev)) 1093 return 0; 1094 1095 rcu_read_lock(); 1096 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1097 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1098 if (idx < 0) 1099 continue; 1100 1101 slave = bond_slave_get_rcu(ndev_tmp); 1102 if (slave) 1103 has_inactive |= bond_is_slave_inactive(slave); 1104 } 1105 rcu_read_unlock(); 1106 1107 if (tracker->has_inactive == has_inactive) 1108 return 0; 1109 1110 tracker->has_inactive = has_inactive; 1111 1112 return 1; 1113 } 1114 1115 /* this handler is always registered to netdev events */ 1116 static int mlx5_lag_netdev_event(struct notifier_block *this, 1117 unsigned long event, void *ptr) 1118 { 1119 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1120 struct lag_tracker tracker; 1121 struct mlx5_lag *ldev; 1122 int changed = 0; 1123 1124 if (event != NETDEV_CHANGEUPPER && 1125 event != NETDEV_CHANGELOWERSTATE && 1126 event != NETDEV_CHANGEINFODATA) 1127 return NOTIFY_DONE; 1128 1129 ldev = container_of(this, struct mlx5_lag, nb); 1130 1131 tracker = ldev->tracker; 1132 1133 switch (event) { 1134 case NETDEV_CHANGEUPPER: 1135 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1136 break; 1137 case NETDEV_CHANGELOWERSTATE: 1138 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1139 ndev, ptr); 1140 break; 1141 case NETDEV_CHANGEINFODATA: 1142 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1143 break; 1144 } 1145 1146 ldev->tracker = tracker; 1147 1148 if (changed) 1149 mlx5_queue_bond_work(ldev, 0); 1150 1151 return NOTIFY_DONE; 1152 } 1153 1154 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1155 struct mlx5_core_dev *dev, 1156 struct net_device *netdev) 1157 { 1158 unsigned int fn = mlx5_get_dev_index(dev); 1159 unsigned long flags; 1160 1161 if (fn >= ldev->ports) 1162 return; 1163 1164 spin_lock_irqsave(&lag_lock, flags); 1165 ldev->pf[fn].netdev = netdev; 1166 ldev->tracker.netdev_state[fn].link_up = 0; 1167 ldev->tracker.netdev_state[fn].tx_enabled = 0; 1168 spin_unlock_irqrestore(&lag_lock, flags); 1169 } 1170 1171 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1172 struct net_device *netdev) 1173 { 1174 unsigned long flags; 1175 int i; 1176 1177 spin_lock_irqsave(&lag_lock, flags); 1178 for (i = 0; i < ldev->ports; i++) { 1179 if (ldev->pf[i].netdev == netdev) { 1180 ldev->pf[i].netdev = NULL; 1181 break; 1182 } 1183 } 1184 spin_unlock_irqrestore(&lag_lock, flags); 1185 } 1186 1187 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1188 struct mlx5_core_dev *dev) 1189 { 1190 unsigned int fn = mlx5_get_dev_index(dev); 1191 1192 if (fn >= ldev->ports) 1193 return; 1194 1195 ldev->pf[fn].dev = dev; 1196 dev->priv.lag = ldev; 1197 } 1198 1199 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1200 struct mlx5_core_dev *dev) 1201 { 1202 int i; 1203 1204 for (i = 0; i < ldev->ports; i++) 1205 if (ldev->pf[i].dev == dev) 1206 break; 1207 1208 if (i == ldev->ports) 1209 return; 1210 1211 ldev->pf[i].dev = NULL; 1212 dev->priv.lag = NULL; 1213 } 1214 1215 /* Must be called with intf_mutex held */ 1216 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 1217 { 1218 struct mlx5_lag *ldev = NULL; 1219 struct mlx5_core_dev *tmp_dev; 1220 1221 tmp_dev = mlx5_get_next_phys_dev_lag(dev); 1222 if (tmp_dev) 1223 ldev = mlx5_lag_dev(tmp_dev); 1224 1225 if (!ldev) { 1226 ldev = mlx5_lag_dev_alloc(dev); 1227 if (!ldev) { 1228 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1229 return 0; 1230 } 1231 mlx5_ldev_add_mdev(ldev, dev); 1232 return 0; 1233 } 1234 1235 mutex_lock(&ldev->lock); 1236 if (ldev->mode_changes_in_progress) { 1237 mutex_unlock(&ldev->lock); 1238 return -EAGAIN; 1239 } 1240 mlx5_ldev_get(ldev); 1241 mlx5_ldev_add_mdev(ldev, dev); 1242 mutex_unlock(&ldev->lock); 1243 1244 return 0; 1245 } 1246 1247 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 1248 { 1249 struct mlx5_lag *ldev; 1250 1251 ldev = mlx5_lag_dev(dev); 1252 if (!ldev) 1253 return; 1254 1255 /* mdev is being removed, might as well remove debugfs 1256 * as early as possible. 1257 */ 1258 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1259 recheck: 1260 mutex_lock(&ldev->lock); 1261 if (ldev->mode_changes_in_progress) { 1262 mutex_unlock(&ldev->lock); 1263 msleep(100); 1264 goto recheck; 1265 } 1266 mlx5_ldev_remove_mdev(ldev, dev); 1267 mutex_unlock(&ldev->lock); 1268 mlx5_ldev_put(ldev); 1269 } 1270 1271 bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) 1272 { 1273 if (!MLX5_CAP_GEN(dev, vport_group_manager) || 1274 !MLX5_CAP_GEN(dev, lag_master) || 1275 MLX5_CAP_GEN(dev, num_lag_ports) < 2 || 1276 MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) 1277 return false; 1278 return true; 1279 } 1280 1281 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 1282 { 1283 int err; 1284 1285 if (!mlx5_lag_is_supported(dev)) 1286 return; 1287 1288 recheck: 1289 mlx5_dev_list_lock(); 1290 err = __mlx5_lag_dev_add_mdev(dev); 1291 mlx5_dev_list_unlock(); 1292 1293 if (err) { 1294 msleep(100); 1295 goto recheck; 1296 } 1297 mlx5_ldev_add_debugfs(dev); 1298 } 1299 1300 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1301 struct net_device *netdev) 1302 { 1303 struct mlx5_lag *ldev; 1304 bool lag_is_active; 1305 1306 ldev = mlx5_lag_dev(dev); 1307 if (!ldev) 1308 return; 1309 1310 mutex_lock(&ldev->lock); 1311 mlx5_ldev_remove_netdev(ldev, netdev); 1312 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1313 1314 lag_is_active = __mlx5_lag_is_active(ldev); 1315 mutex_unlock(&ldev->lock); 1316 1317 if (lag_is_active) 1318 mlx5_queue_bond_work(ldev, 0); 1319 } 1320 1321 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1322 struct net_device *netdev) 1323 { 1324 struct mlx5_lag *ldev; 1325 int i; 1326 1327 ldev = mlx5_lag_dev(dev); 1328 if (!ldev) 1329 return; 1330 1331 mutex_lock(&ldev->lock); 1332 mlx5_ldev_add_netdev(ldev, dev, netdev); 1333 1334 for (i = 0; i < ldev->ports; i++) 1335 if (!ldev->pf[i].netdev) 1336 break; 1337 1338 if (i >= ldev->ports) 1339 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1340 mutex_unlock(&ldev->lock); 1341 mlx5_queue_bond_work(ldev, 0); 1342 } 1343 1344 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 1345 { 1346 struct mlx5_lag *ldev; 1347 unsigned long flags; 1348 bool res; 1349 1350 spin_lock_irqsave(&lag_lock, flags); 1351 ldev = mlx5_lag_dev(dev); 1352 res = ldev && __mlx5_lag_is_roce(ldev); 1353 spin_unlock_irqrestore(&lag_lock, flags); 1354 1355 return res; 1356 } 1357 EXPORT_SYMBOL(mlx5_lag_is_roce); 1358 1359 bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 1360 { 1361 struct mlx5_lag *ldev; 1362 unsigned long flags; 1363 bool res; 1364 1365 spin_lock_irqsave(&lag_lock, flags); 1366 ldev = mlx5_lag_dev(dev); 1367 res = ldev && __mlx5_lag_is_active(ldev); 1368 spin_unlock_irqrestore(&lag_lock, flags); 1369 1370 return res; 1371 } 1372 EXPORT_SYMBOL(mlx5_lag_is_active); 1373 1374 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 1375 { 1376 struct mlx5_lag *ldev; 1377 unsigned long flags; 1378 bool res = 0; 1379 1380 spin_lock_irqsave(&lag_lock, flags); 1381 ldev = mlx5_lag_dev(dev); 1382 if (ldev) 1383 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 1384 spin_unlock_irqrestore(&lag_lock, flags); 1385 1386 return res; 1387 } 1388 EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 1389 1390 bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 1391 { 1392 struct mlx5_lag *ldev; 1393 unsigned long flags; 1394 bool res; 1395 1396 spin_lock_irqsave(&lag_lock, flags); 1397 ldev = mlx5_lag_dev(dev); 1398 res = ldev && __mlx5_lag_is_active(ldev) && 1399 dev == ldev->pf[MLX5_LAG_P1].dev; 1400 spin_unlock_irqrestore(&lag_lock, flags); 1401 1402 return res; 1403 } 1404 EXPORT_SYMBOL(mlx5_lag_is_master); 1405 1406 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 1407 { 1408 struct mlx5_lag *ldev; 1409 unsigned long flags; 1410 bool res; 1411 1412 spin_lock_irqsave(&lag_lock, flags); 1413 ldev = mlx5_lag_dev(dev); 1414 res = ldev && __mlx5_lag_is_sriov(ldev); 1415 spin_unlock_irqrestore(&lag_lock, flags); 1416 1417 return res; 1418 } 1419 EXPORT_SYMBOL(mlx5_lag_is_sriov); 1420 1421 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 1422 { 1423 struct mlx5_lag *ldev; 1424 unsigned long flags; 1425 bool res; 1426 1427 spin_lock_irqsave(&lag_lock, flags); 1428 ldev = mlx5_lag_dev(dev); 1429 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1430 spin_unlock_irqrestore(&lag_lock, flags); 1431 1432 return res; 1433 } 1434 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 1435 1436 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1437 { 1438 struct mlx5_lag *ldev; 1439 1440 ldev = mlx5_lag_dev(dev); 1441 if (!ldev) 1442 return; 1443 1444 mlx5_dev_list_lock(); 1445 mutex_lock(&ldev->lock); 1446 1447 ldev->mode_changes_in_progress++; 1448 if (__mlx5_lag_is_active(ldev)) 1449 mlx5_disable_lag(ldev); 1450 1451 mutex_unlock(&ldev->lock); 1452 mlx5_dev_list_unlock(); 1453 } 1454 1455 void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 1456 { 1457 struct mlx5_lag *ldev; 1458 1459 ldev = mlx5_lag_dev(dev); 1460 if (!ldev) 1461 return; 1462 1463 mutex_lock(&ldev->lock); 1464 ldev->mode_changes_in_progress--; 1465 mutex_unlock(&ldev->lock); 1466 mlx5_queue_bond_work(ldev, 0); 1467 } 1468 1469 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) 1470 { 1471 struct net_device *ndev = NULL; 1472 struct mlx5_lag *ldev; 1473 unsigned long flags; 1474 int i; 1475 1476 spin_lock_irqsave(&lag_lock, flags); 1477 ldev = mlx5_lag_dev(dev); 1478 1479 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1480 goto unlock; 1481 1482 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1483 for (i = 0; i < ldev->ports; i++) 1484 if (ldev->tracker.netdev_state[i].tx_enabled) 1485 ndev = ldev->pf[i].netdev; 1486 if (!ndev) 1487 ndev = ldev->pf[ldev->ports - 1].netdev; 1488 } else { 1489 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1490 } 1491 if (ndev) 1492 dev_hold(ndev); 1493 1494 unlock: 1495 spin_unlock_irqrestore(&lag_lock, flags); 1496 1497 return ndev; 1498 } 1499 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); 1500 1501 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 1502 struct net_device *slave) 1503 { 1504 struct mlx5_lag *ldev; 1505 unsigned long flags; 1506 u8 port = 0; 1507 int i; 1508 1509 spin_lock_irqsave(&lag_lock, flags); 1510 ldev = mlx5_lag_dev(dev); 1511 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1512 goto unlock; 1513 1514 for (i = 0; i < ldev->ports; i++) { 1515 if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1516 port = i; 1517 break; 1518 } 1519 } 1520 1521 port = ldev->v2p_map[port * ldev->buckets]; 1522 1523 unlock: 1524 spin_unlock_irqrestore(&lag_lock, flags); 1525 return port; 1526 } 1527 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1528 1529 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1530 { 1531 struct mlx5_lag *ldev; 1532 1533 ldev = mlx5_lag_dev(dev); 1534 if (!ldev) 1535 return 0; 1536 1537 return ldev->ports; 1538 } 1539 EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1540 1541 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i) 1542 { 1543 struct mlx5_core_dev *peer_dev = NULL; 1544 struct mlx5_lag *ldev; 1545 unsigned long flags; 1546 int idx; 1547 1548 spin_lock_irqsave(&lag_lock, flags); 1549 ldev = mlx5_lag_dev(dev); 1550 if (!ldev) 1551 goto unlock; 1552 1553 if (*i == ldev->ports) 1554 goto unlock; 1555 for (idx = *i; idx < ldev->ports; idx++) 1556 if (ldev->pf[idx].dev != dev) 1557 break; 1558 1559 if (idx == ldev->ports) { 1560 *i = idx; 1561 goto unlock; 1562 } 1563 *i = idx + 1; 1564 1565 peer_dev = ldev->pf[idx].dev; 1566 1567 unlock: 1568 spin_unlock_irqrestore(&lag_lock, flags); 1569 return peer_dev; 1570 } 1571 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev); 1572 1573 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 1574 u64 *values, 1575 int num_counters, 1576 size_t *offsets) 1577 { 1578 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1579 struct mlx5_core_dev **mdev; 1580 struct mlx5_lag *ldev; 1581 unsigned long flags; 1582 int num_ports; 1583 int ret, i, j; 1584 void *out; 1585 1586 out = kvzalloc(outlen, GFP_KERNEL); 1587 if (!out) 1588 return -ENOMEM; 1589 1590 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1591 if (!mdev) { 1592 ret = -ENOMEM; 1593 goto free_out; 1594 } 1595 1596 memset(values, 0, sizeof(*values) * num_counters); 1597 1598 spin_lock_irqsave(&lag_lock, flags); 1599 ldev = mlx5_lag_dev(dev); 1600 if (ldev && __mlx5_lag_is_active(ldev)) { 1601 num_ports = ldev->ports; 1602 for (i = 0; i < ldev->ports; i++) 1603 mdev[i] = ldev->pf[i].dev; 1604 } else { 1605 num_ports = 1; 1606 mdev[MLX5_LAG_P1] = dev; 1607 } 1608 spin_unlock_irqrestore(&lag_lock, flags); 1609 1610 for (i = 0; i < num_ports; ++i) { 1611 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 1612 1613 MLX5_SET(query_cong_statistics_in, in, opcode, 1614 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 1615 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1616 out); 1617 if (ret) 1618 goto free_mdev; 1619 1620 for (j = 0; j < num_counters; ++j) 1621 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1622 } 1623 1624 free_mdev: 1625 kvfree(mdev); 1626 free_out: 1627 kvfree(out); 1628 return ret; 1629 } 1630 EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 1631