1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/netdevice.h> 34 #include <net/bonding.h> 35 #include <linux/mlx5/driver.h> 36 #include <linux/mlx5/eswitch.h> 37 #include <linux/mlx5/vport.h> 38 #include "lib/devcom.h" 39 #include "mlx5_core.h" 40 #include "eswitch.h" 41 #include "esw/acl/ofld.h" 42 #include "lag.h" 43 #include "mp.h" 44 #include "mpesw.h" 45 46 enum { 47 MLX5_LAG_EGRESS_PORT_1 = 1, 48 MLX5_LAG_EGRESS_PORT_2, 49 }; 50 51 /* General purpose, use for short periods of time. 52 * Beware of lock dependencies (preferably, no locks should be acquired 53 * under it). 54 */ 55 static DEFINE_SPINLOCK(lag_lock); 56 57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 58 { 59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 61 62 if (mode == MLX5_LAG_MODE_MPESW) 63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 64 65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 66 } 67 68 static u8 lag_active_port_bits(struct mlx5_lag *ldev) 69 { 70 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 71 u8 active_port = 0; 72 int num_enabled; 73 int idx; 74 75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, 76 &num_enabled); 77 for (idx = 0; idx < num_enabled; idx++) 78 active_port |= BIT_MASK(enabled_ports[idx]); 79 80 return active_port; 81 } 82 83 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, 84 unsigned long flags) 85 { 86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 87 &flags); 88 int port_sel_mode = get_port_sel_mode(mode, flags); 89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 96 switch (port_sel_mode) { 97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 100 break; 101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 103 break; 104 105 MLX5_SET(lagc, lag_ctx, active_port, 106 lag_active_port_bits(mlx5_lag_dev(dev))); 107 break; 108 default: 109 break; 110 } 111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 112 113 return mlx5_cmd_exec_in(dev, create_lag, in); 114 } 115 116 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 117 u8 *ports) 118 { 119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 121 122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 123 MLX5_SET(modify_lag_in, in, field_select, 0x1); 124 125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 127 128 return mlx5_cmd_exec_in(dev, modify_lag, in); 129 } 130 131 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 132 { 133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 134 135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 136 137 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 138 } 139 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 140 141 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 142 { 143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 144 145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 146 147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 148 } 149 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 150 151 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 152 u8 *ports, int *num_disabled) 153 { 154 int i; 155 156 *num_disabled = 0; 157 for (i = 0; i < num_ports; i++) { 158 if (!tracker->netdev_state[i].tx_enabled || 159 !tracker->netdev_state[i].link_up) 160 ports[(*num_disabled)++] = i; 161 } 162 } 163 164 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 165 u8 *ports, int *num_enabled) 166 { 167 int i; 168 169 *num_enabled = 0; 170 for (i = 0; i < num_ports; i++) { 171 if (tracker->netdev_state[i].tx_enabled && 172 tracker->netdev_state[i].link_up) 173 ports[(*num_enabled)++] = i; 174 } 175 176 if (*num_enabled == 0) 177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 178 } 179 180 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 181 struct mlx5_lag *ldev, 182 struct lag_tracker *tracker, 183 unsigned long flags) 184 { 185 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 186 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 187 int written = 0; 188 int num_enabled; 189 int idx; 190 int err; 191 int i; 192 int j; 193 194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 196 &num_enabled); 197 for (i = 0; i < num_enabled; i++) { 198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 199 if (err != 3) 200 return; 201 written += err; 202 } 203 buf[written - 2] = 0; 204 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 205 } else { 206 for (i = 0; i < ldev->ports; i++) { 207 for (j = 0; j < ldev->buckets; j++) { 208 idx = i * ldev->buckets + j; 209 err = scnprintf(buf + written, 10, 210 " port %d:%d", i + 1, ldev->v2p_map[idx]); 211 if (err != 9) 212 return; 213 written += err; 214 } 215 } 216 mlx5_core_info(dev, "lag map:%s\n", buf); 217 } 218 } 219 220 static int mlx5_lag_netdev_event(struct notifier_block *this, 221 unsigned long event, void *ptr); 222 static void mlx5_do_bond_work(struct work_struct *work); 223 224 static void mlx5_ldev_free(struct kref *ref) 225 { 226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 227 228 if (ldev->nb.notifier_call) 229 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 230 mlx5_lag_mp_cleanup(ldev); 231 cancel_delayed_work_sync(&ldev->bond_work); 232 destroy_workqueue(ldev->wq); 233 mutex_destroy(&ldev->lock); 234 kfree(ldev); 235 } 236 237 static void mlx5_ldev_put(struct mlx5_lag *ldev) 238 { 239 kref_put(&ldev->ref, mlx5_ldev_free); 240 } 241 242 static void mlx5_ldev_get(struct mlx5_lag *ldev) 243 { 244 kref_get(&ldev->ref); 245 } 246 247 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 248 { 249 struct mlx5_lag *ldev; 250 int err; 251 252 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); 253 if (!ldev) 254 return NULL; 255 256 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 257 if (!ldev->wq) { 258 kfree(ldev); 259 return NULL; 260 } 261 262 kref_init(&ldev->ref); 263 mutex_init(&ldev->lock); 264 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 265 266 ldev->nb.notifier_call = mlx5_lag_netdev_event; 267 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { 268 ldev->nb.notifier_call = NULL; 269 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 270 } 271 ldev->mode = MLX5_LAG_MODE_NONE; 272 273 err = mlx5_lag_mp_init(ldev); 274 if (err) 275 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 276 err); 277 278 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 279 ldev->buckets = 1; 280 281 return ldev; 282 } 283 284 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 285 struct net_device *ndev) 286 { 287 int i; 288 289 for (i = 0; i < ldev->ports; i++) 290 if (ldev->pf[i].netdev == ndev) 291 return i; 292 293 return -ENOENT; 294 } 295 296 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 297 { 298 return ldev->mode == MLX5_LAG_MODE_ROCE; 299 } 300 301 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 302 { 303 return ldev->mode == MLX5_LAG_MODE_SRIOV; 304 } 305 306 /* Create a mapping between steering slots and active ports. 307 * As we have ldev->buckets slots per port first assume the native 308 * mapping should be used. 309 * If there are ports that are disabled fill the relevant slots 310 * with mapping that points to active ports. 311 */ 312 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 313 u8 num_ports, 314 u8 buckets, 315 u8 *ports) 316 { 317 int disabled[MLX5_MAX_PORTS] = {}; 318 int enabled[MLX5_MAX_PORTS] = {}; 319 int disabled_ports_num = 0; 320 int enabled_ports_num = 0; 321 int idx; 322 u32 rand; 323 int i; 324 int j; 325 326 for (i = 0; i < num_ports; i++) { 327 if (tracker->netdev_state[i].tx_enabled && 328 tracker->netdev_state[i].link_up) 329 enabled[enabled_ports_num++] = i; 330 else 331 disabled[disabled_ports_num++] = i; 332 } 333 334 /* Use native mapping by default where each port's buckets 335 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 336 */ 337 for (i = 0; i < num_ports; i++) 338 for (j = 0; j < buckets; j++) { 339 idx = i * buckets + j; 340 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 341 } 342 343 /* If all ports are disabled/enabled keep native mapping */ 344 if (enabled_ports_num == num_ports || 345 disabled_ports_num == num_ports) 346 return; 347 348 /* Go over the disabled ports and for each assign a random active port */ 349 for (i = 0; i < disabled_ports_num; i++) { 350 for (j = 0; j < buckets; j++) { 351 get_random_bytes(&rand, 4); 352 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 353 } 354 } 355 } 356 357 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 358 { 359 int i; 360 361 for (i = 0; i < ldev->ports; i++) 362 if (ldev->pf[i].has_drop) 363 return true; 364 return false; 365 } 366 367 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 368 { 369 int i; 370 371 for (i = 0; i < ldev->ports; i++) { 372 if (!ldev->pf[i].has_drop) 373 continue; 374 375 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, 376 MLX5_VPORT_UPLINK); 377 ldev->pf[i].has_drop = false; 378 } 379 } 380 381 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 382 struct lag_tracker *tracker) 383 { 384 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 385 struct mlx5_core_dev *dev; 386 int disabled_index; 387 int num_disabled; 388 int err; 389 int i; 390 391 /* First delete the current drop rule so there won't be any dropped 392 * packets 393 */ 394 mlx5_lag_drop_rule_cleanup(ldev); 395 396 if (!ldev->tracker.has_inactive) 397 return; 398 399 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 400 401 for (i = 0; i < num_disabled; i++) { 402 disabled_index = disabled_ports[i]; 403 dev = ldev->pf[disabled_index].dev; 404 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 405 MLX5_VPORT_UPLINK); 406 if (!err) 407 ldev->pf[disabled_index].has_drop = true; 408 else 409 mlx5_core_err(dev, 410 "Failed to create lag drop rule, error: %d", err); 411 } 412 } 413 414 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 415 { 416 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 417 void *lag_ctx; 418 419 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 420 421 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 422 MLX5_SET(modify_lag_in, in, field_select, 0x2); 423 424 MLX5_SET(lagc, lag_ctx, active_port, ports); 425 426 return mlx5_cmd_exec_in(dev, modify_lag, in); 427 } 428 429 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 430 { 431 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 432 u8 active_ports; 433 int ret; 434 435 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 436 ret = mlx5_lag_port_sel_modify(ldev, ports); 437 if (ret || 438 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 439 return ret; 440 441 active_ports = lag_active_port_bits(ldev); 442 443 return mlx5_cmd_modify_active_port(dev0, active_ports); 444 } 445 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 446 } 447 448 void mlx5_modify_lag(struct mlx5_lag *ldev, 449 struct lag_tracker *tracker) 450 { 451 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 452 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 453 int idx; 454 int err; 455 int i; 456 int j; 457 458 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 459 460 for (i = 0; i < ldev->ports; i++) { 461 for (j = 0; j < ldev->buckets; j++) { 462 idx = i * ldev->buckets + j; 463 if (ports[idx] == ldev->v2p_map[idx]) 464 continue; 465 err = _mlx5_modify_lag(ldev, ports); 466 if (err) { 467 mlx5_core_err(dev0, 468 "Failed to modify LAG (%d)\n", 469 err); 470 return; 471 } 472 memcpy(ldev->v2p_map, ports, sizeof(ports)); 473 474 mlx5_lag_print_mapping(dev0, ldev, tracker, 475 ldev->mode_flags); 476 break; 477 } 478 } 479 480 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 481 !(ldev->mode == MLX5_LAG_MODE_ROCE)) 482 mlx5_lag_drop_rule_setup(ldev, tracker); 483 } 484 485 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 486 unsigned long *flags) 487 { 488 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 489 490 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 491 if (ldev->ports > 2) 492 return -EINVAL; 493 return 0; 494 } 495 496 if (ldev->ports > 2) 497 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 498 499 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 500 501 return 0; 502 } 503 504 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 505 struct lag_tracker *tracker, 506 enum mlx5_lag_mode mode, 507 unsigned long *flags) 508 { 509 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 510 511 if (mode == MLX5_LAG_MODE_MPESW) 512 return; 513 514 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 515 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) 516 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 517 } 518 519 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 520 struct lag_tracker *tracker, bool shared_fdb, 521 unsigned long *flags) 522 { 523 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 524 525 *flags = 0; 526 if (shared_fdb) { 527 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 528 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 529 } 530 531 if (mode == MLX5_LAG_MODE_MPESW) 532 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 533 534 if (roce_lag) 535 return mlx5_lag_set_port_sel_mode_roce(ldev, flags); 536 537 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); 538 return 0; 539 } 540 541 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 542 { 543 int port_sel_mode = get_port_sel_mode(mode, flags); 544 545 switch (port_sel_mode) { 546 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 547 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 548 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 549 default: return "invalid"; 550 } 551 } 552 553 static int mlx5_create_lag(struct mlx5_lag *ldev, 554 struct lag_tracker *tracker, 555 enum mlx5_lag_mode mode, 556 unsigned long flags) 557 { 558 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 559 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 560 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 561 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 562 int err; 563 564 if (tracker) 565 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 566 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 567 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 568 569 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); 570 if (err) { 571 mlx5_core_err(dev0, 572 "Failed to create LAG (%d)\n", 573 err); 574 return err; 575 } 576 577 if (shared_fdb) { 578 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, 579 dev1->priv.eswitch); 580 if (err) 581 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 582 else 583 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 584 } 585 586 if (err) { 587 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 588 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 589 mlx5_core_err(dev0, 590 "Failed to deactivate RoCE LAG; driver restart required\n"); 591 } 592 593 return err; 594 } 595 596 int mlx5_activate_lag(struct mlx5_lag *ldev, 597 struct lag_tracker *tracker, 598 enum mlx5_lag_mode mode, 599 bool shared_fdb) 600 { 601 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 602 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 603 unsigned long flags = 0; 604 int err; 605 606 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 607 if (err) 608 return err; 609 610 if (mode != MLX5_LAG_MODE_MPESW) { 611 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 612 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 613 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 614 ldev->v2p_map); 615 if (err) { 616 mlx5_core_err(dev0, 617 "Failed to create LAG port selection(%d)\n", 618 err); 619 return err; 620 } 621 } 622 } 623 624 err = mlx5_create_lag(ldev, tracker, mode, flags); 625 if (err) { 626 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 627 mlx5_lag_port_sel_destroy(ldev); 628 if (roce_lag) 629 mlx5_core_err(dev0, 630 "Failed to activate RoCE LAG\n"); 631 else 632 mlx5_core_err(dev0, 633 "Failed to activate VF LAG\n" 634 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 635 return err; 636 } 637 638 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 639 !roce_lag) 640 mlx5_lag_drop_rule_setup(ldev, tracker); 641 642 ldev->mode = mode; 643 ldev->mode_flags = flags; 644 return 0; 645 } 646 647 int mlx5_deactivate_lag(struct mlx5_lag *ldev) 648 { 649 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 650 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 651 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 652 bool roce_lag = __mlx5_lag_is_roce(ldev); 653 unsigned long flags = ldev->mode_flags; 654 int err; 655 656 ldev->mode = MLX5_LAG_MODE_NONE; 657 ldev->mode_flags = 0; 658 mlx5_lag_mp_reset(ldev); 659 660 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 661 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, 662 dev1->priv.eswitch); 663 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 664 } 665 666 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 667 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 668 if (err) { 669 if (roce_lag) { 670 mlx5_core_err(dev0, 671 "Failed to deactivate RoCE LAG; driver restart required\n"); 672 } else { 673 mlx5_core_err(dev0, 674 "Failed to deactivate VF LAG; driver restart required\n" 675 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 676 } 677 return err; 678 } 679 680 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 681 mlx5_lag_port_sel_destroy(ldev); 682 if (mlx5_lag_has_drop_rule(ldev)) 683 mlx5_lag_drop_rule_cleanup(ldev); 684 685 return 0; 686 } 687 688 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 689 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 690 { 691 #ifdef CONFIG_MLX5_ESWITCH 692 struct mlx5_core_dev *dev; 693 u8 mode; 694 #endif 695 int i; 696 697 for (i = 0; i < ldev->ports; i++) 698 if (!ldev->pf[i].dev) 699 return false; 700 701 #ifdef CONFIG_MLX5_ESWITCH 702 for (i = 0; i < ldev->ports; i++) { 703 dev = ldev->pf[i].dev; 704 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) 705 return false; 706 } 707 708 dev = ldev->pf[MLX5_LAG_P1].dev; 709 mode = mlx5_eswitch_mode(dev); 710 for (i = 0; i < ldev->ports; i++) 711 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 712 return false; 713 714 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 715 return false; 716 #else 717 for (i = 0; i < ldev->ports; i++) 718 if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 719 return false; 720 #endif 721 return true; 722 } 723 724 void mlx5_lag_add_devices(struct mlx5_lag *ldev) 725 { 726 int i; 727 728 for (i = 0; i < ldev->ports; i++) { 729 if (!ldev->pf[i].dev) 730 continue; 731 732 if (ldev->pf[i].dev->priv.flags & 733 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 734 continue; 735 736 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 737 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 738 } 739 } 740 741 void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 742 { 743 int i; 744 745 for (i = 0; i < ldev->ports; i++) { 746 if (!ldev->pf[i].dev) 747 continue; 748 749 if (ldev->pf[i].dev->priv.flags & 750 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 751 continue; 752 753 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 754 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 755 } 756 } 757 758 void mlx5_disable_lag(struct mlx5_lag *ldev) 759 { 760 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 761 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 762 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 763 bool roce_lag; 764 int err; 765 int i; 766 767 roce_lag = __mlx5_lag_is_roce(ldev); 768 769 if (shared_fdb) { 770 mlx5_lag_remove_devices(ldev); 771 } else if (roce_lag) { 772 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 773 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 774 mlx5_rescan_drivers_locked(dev0); 775 } 776 for (i = 1; i < ldev->ports; i++) 777 mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 778 } 779 780 err = mlx5_deactivate_lag(ldev); 781 if (err) 782 return; 783 784 if (shared_fdb || roce_lag) 785 mlx5_lag_add_devices(ldev); 786 787 if (shared_fdb) { 788 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 789 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 790 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 791 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 792 } 793 } 794 795 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) 796 { 797 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 798 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 799 800 if (is_mdev_switchdev_mode(dev0) && 801 is_mdev_switchdev_mode(dev1) && 802 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && 803 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && 804 mlx5_devcom_is_paired(dev0->priv.devcom, 805 MLX5_DEVCOM_ESW_OFFLOADS) && 806 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && 807 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && 808 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) 809 return true; 810 811 return false; 812 } 813 814 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 815 { 816 bool roce_lag = true; 817 int i; 818 819 for (i = 0; i < ldev->ports; i++) 820 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 821 822 #ifdef CONFIG_MLX5_ESWITCH 823 for (i = 0; i < ldev->ports; i++) 824 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); 825 #endif 826 827 return roce_lag; 828 } 829 830 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 831 { 832 return do_bond && __mlx5_lag_is_active(ldev) && 833 ldev->mode != MLX5_LAG_MODE_MPESW; 834 } 835 836 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 837 { 838 return !do_bond && __mlx5_lag_is_active(ldev) && 839 ldev->mode != MLX5_LAG_MODE_MPESW; 840 } 841 842 static void mlx5_do_bond(struct mlx5_lag *ldev) 843 { 844 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 845 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 846 struct lag_tracker tracker = { }; 847 bool do_bond, roce_lag; 848 int err; 849 int i; 850 851 if (!mlx5_lag_is_ready(ldev)) { 852 do_bond = false; 853 } else { 854 /* VF LAG is in multipath mode, ignore bond change requests */ 855 if (mlx5_lag_is_multipath(dev0)) 856 return; 857 858 tracker = ldev->tracker; 859 860 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 861 } 862 863 if (do_bond && !__mlx5_lag_is_active(ldev)) { 864 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 865 866 roce_lag = mlx5_lag_is_roce_lag(ldev); 867 868 if (shared_fdb || roce_lag) 869 mlx5_lag_remove_devices(ldev); 870 871 err = mlx5_activate_lag(ldev, &tracker, 872 roce_lag ? MLX5_LAG_MODE_ROCE : 873 MLX5_LAG_MODE_SRIOV, 874 shared_fdb); 875 if (err) { 876 if (shared_fdb || roce_lag) 877 mlx5_lag_add_devices(ldev); 878 879 return; 880 } else if (roce_lag) { 881 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 882 mlx5_rescan_drivers_locked(dev0); 883 for (i = 1; i < ldev->ports; i++) 884 mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 885 } else if (shared_fdb) { 886 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 887 mlx5_rescan_drivers_locked(dev0); 888 889 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); 890 if (!err) 891 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); 892 893 if (err) { 894 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 895 mlx5_rescan_drivers_locked(dev0); 896 mlx5_deactivate_lag(ldev); 897 mlx5_lag_add_devices(ldev); 898 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 899 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 900 mlx5_core_err(dev0, "Failed to enable lag\n"); 901 return; 902 } 903 } 904 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 905 mlx5_modify_lag(ldev, &tracker); 906 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 907 mlx5_disable_lag(ldev); 908 } 909 } 910 911 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 912 { 913 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 914 } 915 916 static void mlx5_do_bond_work(struct work_struct *work) 917 { 918 struct delayed_work *delayed_work = to_delayed_work(work); 919 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 920 bond_work); 921 int status; 922 923 status = mlx5_dev_list_trylock(); 924 if (!status) { 925 mlx5_queue_bond_work(ldev, HZ); 926 return; 927 } 928 929 mutex_lock(&ldev->lock); 930 if (ldev->mode_changes_in_progress) { 931 mutex_unlock(&ldev->lock); 932 mlx5_dev_list_unlock(); 933 mlx5_queue_bond_work(ldev, HZ); 934 return; 935 } 936 937 mlx5_do_bond(ldev); 938 mutex_unlock(&ldev->lock); 939 mlx5_dev_list_unlock(); 940 } 941 942 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 943 struct lag_tracker *tracker, 944 struct netdev_notifier_changeupper_info *info) 945 { 946 struct net_device *upper = info->upper_dev, *ndev_tmp; 947 struct netdev_lag_upper_info *lag_upper_info = NULL; 948 bool is_bonded, is_in_lag, mode_supported; 949 bool has_inactive = 0; 950 struct slave *slave; 951 u8 bond_status = 0; 952 int num_slaves = 0; 953 int changed = 0; 954 int idx; 955 956 if (!netif_is_lag_master(upper)) 957 return 0; 958 959 if (info->linking) 960 lag_upper_info = info->upper_info; 961 962 /* The event may still be of interest if the slave does not belong to 963 * us, but is enslaved to a master which has one or more of our netdevs 964 * as slaves (e.g., if a new slave is added to a master that bonds two 965 * of our netdevs, we should unbond). 966 */ 967 rcu_read_lock(); 968 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 969 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 970 if (idx >= 0) { 971 slave = bond_slave_get_rcu(ndev_tmp); 972 if (slave) 973 has_inactive |= bond_is_slave_inactive(slave); 974 bond_status |= (1 << idx); 975 } 976 977 num_slaves++; 978 } 979 rcu_read_unlock(); 980 981 /* None of this lagdev's netdevs are slaves of this master. */ 982 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 983 return 0; 984 985 if (lag_upper_info) { 986 tracker->tx_type = lag_upper_info->tx_type; 987 tracker->hash_type = lag_upper_info->hash_type; 988 } 989 990 tracker->has_inactive = has_inactive; 991 /* Determine bonding status: 992 * A device is considered bonded if both its physical ports are slaves 993 * of the same lag master, and only them. 994 */ 995 is_in_lag = num_slaves == ldev->ports && 996 bond_status == GENMASK(ldev->ports - 1, 0); 997 998 /* Lag mode must be activebackup or hash. */ 999 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 1000 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1001 1002 is_bonded = is_in_lag && mode_supported; 1003 if (tracker->is_bonded != is_bonded) { 1004 tracker->is_bonded = is_bonded; 1005 changed = 1; 1006 } 1007 1008 if (!is_in_lag) 1009 return changed; 1010 1011 if (!mlx5_lag_is_ready(ldev)) 1012 NL_SET_ERR_MSG_MOD(info->info.extack, 1013 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1014 else if (!mode_supported) 1015 NL_SET_ERR_MSG_MOD(info->info.extack, 1016 "Can't activate LAG offload, TX type isn't supported"); 1017 1018 return changed; 1019 } 1020 1021 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1022 struct lag_tracker *tracker, 1023 struct net_device *ndev, 1024 struct netdev_notifier_changelowerstate_info *info) 1025 { 1026 struct netdev_lag_lower_state_info *lag_lower_info; 1027 int idx; 1028 1029 if (!netif_is_lag_port(ndev)) 1030 return 0; 1031 1032 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1033 if (idx < 0) 1034 return 0; 1035 1036 /* This information is used to determine virtual to physical 1037 * port mapping. 1038 */ 1039 lag_lower_info = info->lower_state_info; 1040 if (!lag_lower_info) 1041 return 0; 1042 1043 tracker->netdev_state[idx] = *lag_lower_info; 1044 1045 return 1; 1046 } 1047 1048 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1049 struct lag_tracker *tracker, 1050 struct net_device *ndev) 1051 { 1052 struct net_device *ndev_tmp; 1053 struct slave *slave; 1054 bool has_inactive = 0; 1055 int idx; 1056 1057 if (!netif_is_lag_master(ndev)) 1058 return 0; 1059 1060 rcu_read_lock(); 1061 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1062 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1063 if (idx < 0) 1064 continue; 1065 1066 slave = bond_slave_get_rcu(ndev_tmp); 1067 if (slave) 1068 has_inactive |= bond_is_slave_inactive(slave); 1069 } 1070 rcu_read_unlock(); 1071 1072 if (tracker->has_inactive == has_inactive) 1073 return 0; 1074 1075 tracker->has_inactive = has_inactive; 1076 1077 return 1; 1078 } 1079 1080 /* this handler is always registered to netdev events */ 1081 static int mlx5_lag_netdev_event(struct notifier_block *this, 1082 unsigned long event, void *ptr) 1083 { 1084 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1085 struct lag_tracker tracker; 1086 struct mlx5_lag *ldev; 1087 int changed = 0; 1088 1089 if (event != NETDEV_CHANGEUPPER && 1090 event != NETDEV_CHANGELOWERSTATE && 1091 event != NETDEV_CHANGEINFODATA) 1092 return NOTIFY_DONE; 1093 1094 ldev = container_of(this, struct mlx5_lag, nb); 1095 1096 tracker = ldev->tracker; 1097 1098 switch (event) { 1099 case NETDEV_CHANGEUPPER: 1100 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1101 break; 1102 case NETDEV_CHANGELOWERSTATE: 1103 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1104 ndev, ptr); 1105 break; 1106 case NETDEV_CHANGEINFODATA: 1107 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1108 break; 1109 } 1110 1111 ldev->tracker = tracker; 1112 1113 if (changed) 1114 mlx5_queue_bond_work(ldev, 0); 1115 1116 return NOTIFY_DONE; 1117 } 1118 1119 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1120 struct mlx5_core_dev *dev, 1121 struct net_device *netdev) 1122 { 1123 unsigned int fn = mlx5_get_dev_index(dev); 1124 unsigned long flags; 1125 1126 if (fn >= ldev->ports) 1127 return; 1128 1129 spin_lock_irqsave(&lag_lock, flags); 1130 ldev->pf[fn].netdev = netdev; 1131 ldev->tracker.netdev_state[fn].link_up = 0; 1132 ldev->tracker.netdev_state[fn].tx_enabled = 0; 1133 spin_unlock_irqrestore(&lag_lock, flags); 1134 } 1135 1136 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1137 struct net_device *netdev) 1138 { 1139 unsigned long flags; 1140 int i; 1141 1142 spin_lock_irqsave(&lag_lock, flags); 1143 for (i = 0; i < ldev->ports; i++) { 1144 if (ldev->pf[i].netdev == netdev) { 1145 ldev->pf[i].netdev = NULL; 1146 break; 1147 } 1148 } 1149 spin_unlock_irqrestore(&lag_lock, flags); 1150 } 1151 1152 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1153 struct mlx5_core_dev *dev) 1154 { 1155 unsigned int fn = mlx5_get_dev_index(dev); 1156 1157 if (fn >= ldev->ports) 1158 return; 1159 1160 ldev->pf[fn].dev = dev; 1161 dev->priv.lag = ldev; 1162 } 1163 1164 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1165 struct mlx5_core_dev *dev) 1166 { 1167 int i; 1168 1169 for (i = 0; i < ldev->ports; i++) 1170 if (ldev->pf[i].dev == dev) 1171 break; 1172 1173 if (i == ldev->ports) 1174 return; 1175 1176 ldev->pf[i].dev = NULL; 1177 dev->priv.lag = NULL; 1178 } 1179 1180 /* Must be called with intf_mutex held */ 1181 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 1182 { 1183 struct mlx5_lag *ldev = NULL; 1184 struct mlx5_core_dev *tmp_dev; 1185 1186 tmp_dev = mlx5_get_next_phys_dev_lag(dev); 1187 if (tmp_dev) 1188 ldev = mlx5_lag_dev(tmp_dev); 1189 1190 if (!ldev) { 1191 ldev = mlx5_lag_dev_alloc(dev); 1192 if (!ldev) { 1193 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1194 return 0; 1195 } 1196 mlx5_ldev_add_mdev(ldev, dev); 1197 return 0; 1198 } 1199 1200 mutex_lock(&ldev->lock); 1201 if (ldev->mode_changes_in_progress) { 1202 mutex_unlock(&ldev->lock); 1203 return -EAGAIN; 1204 } 1205 mlx5_ldev_get(ldev); 1206 mlx5_ldev_add_mdev(ldev, dev); 1207 mutex_unlock(&ldev->lock); 1208 1209 return 0; 1210 } 1211 1212 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 1213 { 1214 struct mlx5_lag *ldev; 1215 1216 ldev = mlx5_lag_dev(dev); 1217 if (!ldev) 1218 return; 1219 1220 /* mdev is being removed, might as well remove debugfs 1221 * as early as possible. 1222 */ 1223 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1224 recheck: 1225 mutex_lock(&ldev->lock); 1226 if (ldev->mode_changes_in_progress) { 1227 mutex_unlock(&ldev->lock); 1228 msleep(100); 1229 goto recheck; 1230 } 1231 mlx5_ldev_remove_mdev(ldev, dev); 1232 mutex_unlock(&ldev->lock); 1233 mlx5_ldev_put(ldev); 1234 } 1235 1236 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 1237 { 1238 int err; 1239 1240 if (!MLX5_CAP_GEN(dev, vport_group_manager) || 1241 !MLX5_CAP_GEN(dev, lag_master) || 1242 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || 1243 MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) 1244 return; 1245 1246 recheck: 1247 mlx5_dev_list_lock(); 1248 err = __mlx5_lag_dev_add_mdev(dev); 1249 mlx5_dev_list_unlock(); 1250 1251 if (err) { 1252 msleep(100); 1253 goto recheck; 1254 } 1255 mlx5_ldev_add_debugfs(dev); 1256 } 1257 1258 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1259 struct net_device *netdev) 1260 { 1261 struct mlx5_lag *ldev; 1262 bool lag_is_active; 1263 1264 ldev = mlx5_lag_dev(dev); 1265 if (!ldev) 1266 return; 1267 1268 mutex_lock(&ldev->lock); 1269 mlx5_ldev_remove_netdev(ldev, netdev); 1270 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1271 1272 lag_is_active = __mlx5_lag_is_active(ldev); 1273 mutex_unlock(&ldev->lock); 1274 1275 if (lag_is_active) 1276 mlx5_queue_bond_work(ldev, 0); 1277 } 1278 1279 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1280 struct net_device *netdev) 1281 { 1282 struct mlx5_lag *ldev; 1283 int i; 1284 1285 ldev = mlx5_lag_dev(dev); 1286 if (!ldev) 1287 return; 1288 1289 mutex_lock(&ldev->lock); 1290 mlx5_ldev_add_netdev(ldev, dev, netdev); 1291 1292 for (i = 0; i < ldev->ports; i++) 1293 if (!ldev->pf[i].netdev) 1294 break; 1295 1296 if (i >= ldev->ports) 1297 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1298 mutex_unlock(&ldev->lock); 1299 mlx5_queue_bond_work(ldev, 0); 1300 } 1301 1302 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 1303 { 1304 struct mlx5_lag *ldev; 1305 unsigned long flags; 1306 bool res; 1307 1308 spin_lock_irqsave(&lag_lock, flags); 1309 ldev = mlx5_lag_dev(dev); 1310 res = ldev && __mlx5_lag_is_roce(ldev); 1311 spin_unlock_irqrestore(&lag_lock, flags); 1312 1313 return res; 1314 } 1315 EXPORT_SYMBOL(mlx5_lag_is_roce); 1316 1317 bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 1318 { 1319 struct mlx5_lag *ldev; 1320 unsigned long flags; 1321 bool res; 1322 1323 spin_lock_irqsave(&lag_lock, flags); 1324 ldev = mlx5_lag_dev(dev); 1325 res = ldev && __mlx5_lag_is_active(ldev); 1326 spin_unlock_irqrestore(&lag_lock, flags); 1327 1328 return res; 1329 } 1330 EXPORT_SYMBOL(mlx5_lag_is_active); 1331 1332 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 1333 { 1334 struct mlx5_lag *ldev; 1335 unsigned long flags; 1336 bool res = 0; 1337 1338 spin_lock_irqsave(&lag_lock, flags); 1339 ldev = mlx5_lag_dev(dev); 1340 if (ldev) 1341 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 1342 spin_unlock_irqrestore(&lag_lock, flags); 1343 1344 return res; 1345 } 1346 EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 1347 1348 bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 1349 { 1350 struct mlx5_lag *ldev; 1351 unsigned long flags; 1352 bool res; 1353 1354 spin_lock_irqsave(&lag_lock, flags); 1355 ldev = mlx5_lag_dev(dev); 1356 res = ldev && __mlx5_lag_is_active(ldev) && 1357 dev == ldev->pf[MLX5_LAG_P1].dev; 1358 spin_unlock_irqrestore(&lag_lock, flags); 1359 1360 return res; 1361 } 1362 EXPORT_SYMBOL(mlx5_lag_is_master); 1363 1364 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 1365 { 1366 struct mlx5_lag *ldev; 1367 unsigned long flags; 1368 bool res; 1369 1370 spin_lock_irqsave(&lag_lock, flags); 1371 ldev = mlx5_lag_dev(dev); 1372 res = ldev && __mlx5_lag_is_sriov(ldev); 1373 spin_unlock_irqrestore(&lag_lock, flags); 1374 1375 return res; 1376 } 1377 EXPORT_SYMBOL(mlx5_lag_is_sriov); 1378 1379 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 1380 { 1381 struct mlx5_lag *ldev; 1382 unsigned long flags; 1383 bool res; 1384 1385 spin_lock_irqsave(&lag_lock, flags); 1386 ldev = mlx5_lag_dev(dev); 1387 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1388 spin_unlock_irqrestore(&lag_lock, flags); 1389 1390 return res; 1391 } 1392 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 1393 1394 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1395 { 1396 struct mlx5_lag *ldev; 1397 1398 ldev = mlx5_lag_dev(dev); 1399 if (!ldev) 1400 return; 1401 1402 mlx5_dev_list_lock(); 1403 mutex_lock(&ldev->lock); 1404 1405 ldev->mode_changes_in_progress++; 1406 if (__mlx5_lag_is_active(ldev)) 1407 mlx5_disable_lag(ldev); 1408 1409 mutex_unlock(&ldev->lock); 1410 mlx5_dev_list_unlock(); 1411 } 1412 1413 void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 1414 { 1415 struct mlx5_lag *ldev; 1416 1417 ldev = mlx5_lag_dev(dev); 1418 if (!ldev) 1419 return; 1420 1421 mutex_lock(&ldev->lock); 1422 ldev->mode_changes_in_progress--; 1423 mutex_unlock(&ldev->lock); 1424 mlx5_queue_bond_work(ldev, 0); 1425 } 1426 1427 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) 1428 { 1429 struct net_device *ndev = NULL; 1430 struct mlx5_lag *ldev; 1431 unsigned long flags; 1432 int i; 1433 1434 spin_lock_irqsave(&lag_lock, flags); 1435 ldev = mlx5_lag_dev(dev); 1436 1437 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1438 goto unlock; 1439 1440 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1441 for (i = 0; i < ldev->ports; i++) 1442 if (ldev->tracker.netdev_state[i].tx_enabled) 1443 ndev = ldev->pf[i].netdev; 1444 if (!ndev) 1445 ndev = ldev->pf[ldev->ports - 1].netdev; 1446 } else { 1447 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1448 } 1449 if (ndev) 1450 dev_hold(ndev); 1451 1452 unlock: 1453 spin_unlock_irqrestore(&lag_lock, flags); 1454 1455 return ndev; 1456 } 1457 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); 1458 1459 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 1460 struct net_device *slave) 1461 { 1462 struct mlx5_lag *ldev; 1463 unsigned long flags; 1464 u8 port = 0; 1465 int i; 1466 1467 spin_lock_irqsave(&lag_lock, flags); 1468 ldev = mlx5_lag_dev(dev); 1469 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1470 goto unlock; 1471 1472 for (i = 0; i < ldev->ports; i++) { 1473 if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1474 port = i; 1475 break; 1476 } 1477 } 1478 1479 port = ldev->v2p_map[port * ldev->buckets]; 1480 1481 unlock: 1482 spin_unlock_irqrestore(&lag_lock, flags); 1483 return port; 1484 } 1485 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1486 1487 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1488 { 1489 struct mlx5_lag *ldev; 1490 1491 ldev = mlx5_lag_dev(dev); 1492 if (!ldev) 1493 return 0; 1494 1495 return ldev->ports; 1496 } 1497 EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1498 1499 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) 1500 { 1501 struct mlx5_core_dev *peer_dev = NULL; 1502 struct mlx5_lag *ldev; 1503 unsigned long flags; 1504 1505 spin_lock_irqsave(&lag_lock, flags); 1506 ldev = mlx5_lag_dev(dev); 1507 if (!ldev) 1508 goto unlock; 1509 1510 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? 1511 ldev->pf[MLX5_LAG_P2].dev : 1512 ldev->pf[MLX5_LAG_P1].dev; 1513 1514 unlock: 1515 spin_unlock_irqrestore(&lag_lock, flags); 1516 return peer_dev; 1517 } 1518 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); 1519 1520 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 1521 u64 *values, 1522 int num_counters, 1523 size_t *offsets) 1524 { 1525 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1526 struct mlx5_core_dev **mdev; 1527 struct mlx5_lag *ldev; 1528 unsigned long flags; 1529 int num_ports; 1530 int ret, i, j; 1531 void *out; 1532 1533 out = kvzalloc(outlen, GFP_KERNEL); 1534 if (!out) 1535 return -ENOMEM; 1536 1537 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1538 if (!mdev) { 1539 ret = -ENOMEM; 1540 goto free_out; 1541 } 1542 1543 memset(values, 0, sizeof(*values) * num_counters); 1544 1545 spin_lock_irqsave(&lag_lock, flags); 1546 ldev = mlx5_lag_dev(dev); 1547 if (ldev && __mlx5_lag_is_active(ldev)) { 1548 num_ports = ldev->ports; 1549 for (i = 0; i < ldev->ports; i++) 1550 mdev[i] = ldev->pf[i].dev; 1551 } else { 1552 num_ports = 1; 1553 mdev[MLX5_LAG_P1] = dev; 1554 } 1555 spin_unlock_irqrestore(&lag_lock, flags); 1556 1557 for (i = 0; i < num_ports; ++i) { 1558 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 1559 1560 MLX5_SET(query_cong_statistics_in, in, opcode, 1561 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 1562 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1563 out); 1564 if (ret) 1565 goto free_mdev; 1566 1567 for (j = 0; j < num_counters; ++j) 1568 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1569 } 1570 1571 free_mdev: 1572 kvfree(mdev); 1573 free_out: 1574 kvfree(out); 1575 return ret; 1576 } 1577 EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 1578