1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/netdevice.h> 34 #include <net/bonding.h> 35 #include <linux/mlx5/driver.h> 36 #include <linux/mlx5/eswitch.h> 37 #include <linux/mlx5/vport.h> 38 #include "lib/devcom.h" 39 #include "mlx5_core.h" 40 #include "eswitch.h" 41 #include "esw/acl/ofld.h" 42 #include "lag.h" 43 #include "mp.h" 44 #include "mpesw.h" 45 46 enum { 47 MLX5_LAG_EGRESS_PORT_1 = 1, 48 MLX5_LAG_EGRESS_PORT_2, 49 }; 50 51 /* General purpose, use for short periods of time. 52 * Beware of lock dependencies (preferably, no locks should be acquired 53 * under it). 54 */ 55 static DEFINE_SPINLOCK(lag_lock); 56 57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 58 { 59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 61 62 if (mode == MLX5_LAG_MODE_MPESW) 63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 64 65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 66 } 67 68 static u8 lag_active_port_bits(struct mlx5_lag *ldev) 69 { 70 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 71 u8 active_port = 0; 72 int num_enabled; 73 int idx; 74 75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, 76 &num_enabled); 77 for (idx = 0; idx < num_enabled; idx++) 78 active_port |= BIT_MASK(enabled_ports[idx]); 79 80 return active_port; 81 } 82 83 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, 84 unsigned long flags) 85 { 86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 87 &flags); 88 int port_sel_mode = get_port_sel_mode(mode, flags); 89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 96 switch (port_sel_mode) { 97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 100 break; 101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 103 break; 104 105 MLX5_SET(lagc, lag_ctx, active_port, 106 lag_active_port_bits(mlx5_lag_dev(dev))); 107 break; 108 default: 109 break; 110 } 111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 112 113 return mlx5_cmd_exec_in(dev, create_lag, in); 114 } 115 116 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 117 u8 *ports) 118 { 119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 121 122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 123 MLX5_SET(modify_lag_in, in, field_select, 0x1); 124 125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 127 128 return mlx5_cmd_exec_in(dev, modify_lag, in); 129 } 130 131 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 132 { 133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 134 135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 136 137 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 138 } 139 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 140 141 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 142 { 143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 144 145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 146 147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 148 } 149 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 150 151 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 152 u8 *ports, int *num_disabled) 153 { 154 int i; 155 156 *num_disabled = 0; 157 for (i = 0; i < num_ports; i++) { 158 if (!tracker->netdev_state[i].tx_enabled || 159 !tracker->netdev_state[i].link_up) 160 ports[(*num_disabled)++] = i; 161 } 162 } 163 164 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 165 u8 *ports, int *num_enabled) 166 { 167 int i; 168 169 *num_enabled = 0; 170 for (i = 0; i < num_ports; i++) { 171 if (tracker->netdev_state[i].tx_enabled && 172 tracker->netdev_state[i].link_up) 173 ports[(*num_enabled)++] = i; 174 } 175 176 if (*num_enabled == 0) 177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 178 } 179 180 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 181 struct mlx5_lag *ldev, 182 struct lag_tracker *tracker, 183 unsigned long flags) 184 { 185 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 186 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 187 int written = 0; 188 int num_enabled; 189 int idx; 190 int err; 191 int i; 192 int j; 193 194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 196 &num_enabled); 197 for (i = 0; i < num_enabled; i++) { 198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 199 if (err != 3) 200 return; 201 written += err; 202 } 203 buf[written - 2] = 0; 204 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 205 } else { 206 for (i = 0; i < ldev->ports; i++) { 207 for (j = 0; j < ldev->buckets; j++) { 208 idx = i * ldev->buckets + j; 209 err = scnprintf(buf + written, 10, 210 " port %d:%d", i + 1, ldev->v2p_map[idx]); 211 if (err != 9) 212 return; 213 written += err; 214 } 215 } 216 mlx5_core_info(dev, "lag map:%s\n", buf); 217 } 218 } 219 220 static int mlx5_lag_netdev_event(struct notifier_block *this, 221 unsigned long event, void *ptr); 222 static void mlx5_do_bond_work(struct work_struct *work); 223 224 static void mlx5_ldev_free(struct kref *ref) 225 { 226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 227 228 if (ldev->nb.notifier_call) 229 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 230 mlx5_lag_mp_cleanup(ldev); 231 destroy_workqueue(ldev->wq); 232 mlx5_lag_mpesw_cleanup(ldev); 233 mutex_destroy(&ldev->lock); 234 kfree(ldev); 235 } 236 237 static void mlx5_ldev_put(struct mlx5_lag *ldev) 238 { 239 kref_put(&ldev->ref, mlx5_ldev_free); 240 } 241 242 static void mlx5_ldev_get(struct mlx5_lag *ldev) 243 { 244 kref_get(&ldev->ref); 245 } 246 247 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 248 { 249 struct mlx5_lag *ldev; 250 int err; 251 252 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); 253 if (!ldev) 254 return NULL; 255 256 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 257 if (!ldev->wq) { 258 kfree(ldev); 259 return NULL; 260 } 261 262 kref_init(&ldev->ref); 263 mutex_init(&ldev->lock); 264 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 265 266 ldev->nb.notifier_call = mlx5_lag_netdev_event; 267 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { 268 ldev->nb.notifier_call = NULL; 269 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 270 } 271 ldev->mode = MLX5_LAG_MODE_NONE; 272 273 err = mlx5_lag_mp_init(ldev); 274 if (err) 275 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 276 err); 277 278 mlx5_lag_mpesw_init(ldev); 279 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 280 ldev->buckets = 1; 281 282 return ldev; 283 } 284 285 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 286 struct net_device *ndev) 287 { 288 int i; 289 290 for (i = 0; i < ldev->ports; i++) 291 if (ldev->pf[i].netdev == ndev) 292 return i; 293 294 return -ENOENT; 295 } 296 297 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 298 { 299 return ldev->mode == MLX5_LAG_MODE_ROCE; 300 } 301 302 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 303 { 304 return ldev->mode == MLX5_LAG_MODE_SRIOV; 305 } 306 307 /* Create a mapping between steering slots and active ports. 308 * As we have ldev->buckets slots per port first assume the native 309 * mapping should be used. 310 * If there are ports that are disabled fill the relevant slots 311 * with mapping that points to active ports. 312 */ 313 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 314 u8 num_ports, 315 u8 buckets, 316 u8 *ports) 317 { 318 int disabled[MLX5_MAX_PORTS] = {}; 319 int enabled[MLX5_MAX_PORTS] = {}; 320 int disabled_ports_num = 0; 321 int enabled_ports_num = 0; 322 int idx; 323 u32 rand; 324 int i; 325 int j; 326 327 for (i = 0; i < num_ports; i++) { 328 if (tracker->netdev_state[i].tx_enabled && 329 tracker->netdev_state[i].link_up) 330 enabled[enabled_ports_num++] = i; 331 else 332 disabled[disabled_ports_num++] = i; 333 } 334 335 /* Use native mapping by default where each port's buckets 336 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 337 */ 338 for (i = 0; i < num_ports; i++) 339 for (j = 0; j < buckets; j++) { 340 idx = i * buckets + j; 341 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 342 } 343 344 /* If all ports are disabled/enabled keep native mapping */ 345 if (enabled_ports_num == num_ports || 346 disabled_ports_num == num_ports) 347 return; 348 349 /* Go over the disabled ports and for each assign a random active port */ 350 for (i = 0; i < disabled_ports_num; i++) { 351 for (j = 0; j < buckets; j++) { 352 get_random_bytes(&rand, 4); 353 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 354 } 355 } 356 } 357 358 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 359 { 360 int i; 361 362 for (i = 0; i < ldev->ports; i++) 363 if (ldev->pf[i].has_drop) 364 return true; 365 return false; 366 } 367 368 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 369 { 370 int i; 371 372 for (i = 0; i < ldev->ports; i++) { 373 if (!ldev->pf[i].has_drop) 374 continue; 375 376 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, 377 MLX5_VPORT_UPLINK); 378 ldev->pf[i].has_drop = false; 379 } 380 } 381 382 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 383 struct lag_tracker *tracker) 384 { 385 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 386 struct mlx5_core_dev *dev; 387 int disabled_index; 388 int num_disabled; 389 int err; 390 int i; 391 392 /* First delete the current drop rule so there won't be any dropped 393 * packets 394 */ 395 mlx5_lag_drop_rule_cleanup(ldev); 396 397 if (!ldev->tracker.has_inactive) 398 return; 399 400 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 401 402 for (i = 0; i < num_disabled; i++) { 403 disabled_index = disabled_ports[i]; 404 dev = ldev->pf[disabled_index].dev; 405 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 406 MLX5_VPORT_UPLINK); 407 if (!err) 408 ldev->pf[disabled_index].has_drop = true; 409 else 410 mlx5_core_err(dev, 411 "Failed to create lag drop rule, error: %d", err); 412 } 413 } 414 415 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 416 { 417 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 418 void *lag_ctx; 419 420 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 421 422 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 423 MLX5_SET(modify_lag_in, in, field_select, 0x2); 424 425 MLX5_SET(lagc, lag_ctx, active_port, ports); 426 427 return mlx5_cmd_exec_in(dev, modify_lag, in); 428 } 429 430 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 431 { 432 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 433 u8 active_ports; 434 int ret; 435 436 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 437 ret = mlx5_lag_port_sel_modify(ldev, ports); 438 if (ret || 439 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 440 return ret; 441 442 active_ports = lag_active_port_bits(ldev); 443 444 return mlx5_cmd_modify_active_port(dev0, active_ports); 445 } 446 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 447 } 448 449 void mlx5_modify_lag(struct mlx5_lag *ldev, 450 struct lag_tracker *tracker) 451 { 452 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 453 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 454 int idx; 455 int err; 456 int i; 457 int j; 458 459 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 460 461 for (i = 0; i < ldev->ports; i++) { 462 for (j = 0; j < ldev->buckets; j++) { 463 idx = i * ldev->buckets + j; 464 if (ports[idx] == ldev->v2p_map[idx]) 465 continue; 466 err = _mlx5_modify_lag(ldev, ports); 467 if (err) { 468 mlx5_core_err(dev0, 469 "Failed to modify LAG (%d)\n", 470 err); 471 return; 472 } 473 memcpy(ldev->v2p_map, ports, sizeof(ports)); 474 475 mlx5_lag_print_mapping(dev0, ldev, tracker, 476 ldev->mode_flags); 477 break; 478 } 479 } 480 481 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 482 !(ldev->mode == MLX5_LAG_MODE_ROCE)) 483 mlx5_lag_drop_rule_setup(ldev, tracker); 484 } 485 486 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 487 unsigned long *flags) 488 { 489 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 490 491 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 492 if (ldev->ports > 2) 493 return -EINVAL; 494 return 0; 495 } 496 497 if (ldev->ports > 2) 498 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 499 500 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 501 502 return 0; 503 } 504 505 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 506 struct lag_tracker *tracker, 507 enum mlx5_lag_mode mode, 508 unsigned long *flags) 509 { 510 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 511 512 if (mode == MLX5_LAG_MODE_MPESW) 513 return; 514 515 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 516 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) 517 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 518 } 519 520 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 521 struct lag_tracker *tracker, bool shared_fdb, 522 unsigned long *flags) 523 { 524 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 525 526 *flags = 0; 527 if (shared_fdb) { 528 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 529 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 530 } 531 532 if (mode == MLX5_LAG_MODE_MPESW) 533 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 534 535 if (roce_lag) 536 return mlx5_lag_set_port_sel_mode_roce(ldev, flags); 537 538 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); 539 return 0; 540 } 541 542 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 543 { 544 int port_sel_mode = get_port_sel_mode(mode, flags); 545 546 switch (port_sel_mode) { 547 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 548 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 549 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 550 default: return "invalid"; 551 } 552 } 553 554 static int mlx5_create_lag(struct mlx5_lag *ldev, 555 struct lag_tracker *tracker, 556 enum mlx5_lag_mode mode, 557 unsigned long flags) 558 { 559 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 560 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 561 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 562 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 563 int err; 564 565 if (tracker) 566 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 567 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 568 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 569 570 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); 571 if (err) { 572 mlx5_core_err(dev0, 573 "Failed to create LAG (%d)\n", 574 err); 575 return err; 576 } 577 578 if (shared_fdb) { 579 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, 580 dev1->priv.eswitch); 581 if (err) 582 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 583 else 584 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 585 } 586 587 if (err) { 588 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 589 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 590 mlx5_core_err(dev0, 591 "Failed to deactivate RoCE LAG; driver restart required\n"); 592 } 593 594 return err; 595 } 596 597 int mlx5_activate_lag(struct mlx5_lag *ldev, 598 struct lag_tracker *tracker, 599 enum mlx5_lag_mode mode, 600 bool shared_fdb) 601 { 602 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 603 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 604 unsigned long flags = 0; 605 int err; 606 607 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 608 if (err) 609 return err; 610 611 if (mode != MLX5_LAG_MODE_MPESW) { 612 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 613 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 614 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 615 ldev->v2p_map); 616 if (err) { 617 mlx5_core_err(dev0, 618 "Failed to create LAG port selection(%d)\n", 619 err); 620 return err; 621 } 622 } 623 } 624 625 err = mlx5_create_lag(ldev, tracker, mode, flags); 626 if (err) { 627 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 628 mlx5_lag_port_sel_destroy(ldev); 629 if (roce_lag) 630 mlx5_core_err(dev0, 631 "Failed to activate RoCE LAG\n"); 632 else 633 mlx5_core_err(dev0, 634 "Failed to activate VF LAG\n" 635 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 636 return err; 637 } 638 639 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 640 !roce_lag) 641 mlx5_lag_drop_rule_setup(ldev, tracker); 642 643 ldev->mode = mode; 644 ldev->mode_flags = flags; 645 return 0; 646 } 647 648 static int mlx5_deactivate_lag(struct mlx5_lag *ldev) 649 { 650 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 651 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 652 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 653 bool roce_lag = __mlx5_lag_is_roce(ldev); 654 unsigned long flags = ldev->mode_flags; 655 int err; 656 657 ldev->mode = MLX5_LAG_MODE_NONE; 658 ldev->mode_flags = 0; 659 mlx5_lag_mp_reset(ldev); 660 661 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 662 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, 663 dev1->priv.eswitch); 664 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 665 } 666 667 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 668 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 669 if (err) { 670 if (roce_lag) { 671 mlx5_core_err(dev0, 672 "Failed to deactivate RoCE LAG; driver restart required\n"); 673 } else { 674 mlx5_core_err(dev0, 675 "Failed to deactivate VF LAG; driver restart required\n" 676 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 677 } 678 return err; 679 } 680 681 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 682 mlx5_lag_port_sel_destroy(ldev); 683 if (mlx5_lag_has_drop_rule(ldev)) 684 mlx5_lag_drop_rule_cleanup(ldev); 685 686 return 0; 687 } 688 689 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 690 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 691 { 692 #ifdef CONFIG_MLX5_ESWITCH 693 struct mlx5_core_dev *dev; 694 u8 mode; 695 #endif 696 int i; 697 698 for (i = 0; i < ldev->ports; i++) 699 if (!ldev->pf[i].dev) 700 return false; 701 702 #ifdef CONFIG_MLX5_ESWITCH 703 for (i = 0; i < ldev->ports; i++) { 704 dev = ldev->pf[i].dev; 705 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) 706 return false; 707 } 708 709 dev = ldev->pf[MLX5_LAG_P1].dev; 710 mode = mlx5_eswitch_mode(dev); 711 for (i = 0; i < ldev->ports; i++) 712 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 713 return false; 714 715 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 716 return false; 717 #else 718 for (i = 0; i < ldev->ports; i++) 719 if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 720 return false; 721 #endif 722 return true; 723 } 724 725 static void mlx5_lag_add_devices(struct mlx5_lag *ldev) 726 { 727 int i; 728 729 for (i = 0; i < ldev->ports; i++) { 730 if (!ldev->pf[i].dev) 731 continue; 732 733 if (ldev->pf[i].dev->priv.flags & 734 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 735 continue; 736 737 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 738 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 739 } 740 } 741 742 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 743 { 744 int i; 745 746 for (i = 0; i < ldev->ports; i++) { 747 if (!ldev->pf[i].dev) 748 continue; 749 750 if (ldev->pf[i].dev->priv.flags & 751 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 752 continue; 753 754 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 755 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 756 } 757 } 758 759 void mlx5_disable_lag(struct mlx5_lag *ldev) 760 { 761 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 762 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 763 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 764 bool roce_lag; 765 int err; 766 int i; 767 768 roce_lag = __mlx5_lag_is_roce(ldev); 769 770 if (shared_fdb) { 771 mlx5_lag_remove_devices(ldev); 772 } else if (roce_lag) { 773 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 774 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 775 mlx5_rescan_drivers_locked(dev0); 776 } 777 for (i = 1; i < ldev->ports; i++) 778 mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 779 } 780 781 err = mlx5_deactivate_lag(ldev); 782 if (err) 783 return; 784 785 if (shared_fdb || roce_lag) 786 mlx5_lag_add_devices(ldev); 787 788 if (shared_fdb) { 789 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 790 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 791 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 792 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 793 } 794 } 795 796 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) 797 { 798 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 799 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 800 801 if (is_mdev_switchdev_mode(dev0) && 802 is_mdev_switchdev_mode(dev1) && 803 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && 804 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && 805 mlx5_devcom_is_paired(dev0->priv.devcom, 806 MLX5_DEVCOM_ESW_OFFLOADS) && 807 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && 808 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && 809 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) 810 return true; 811 812 return false; 813 } 814 815 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 816 { 817 bool roce_lag = true; 818 int i; 819 820 for (i = 0; i < ldev->ports; i++) 821 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 822 823 #ifdef CONFIG_MLX5_ESWITCH 824 for (i = 0; i < ldev->ports; i++) 825 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); 826 #endif 827 828 return roce_lag; 829 } 830 831 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 832 { 833 return do_bond && __mlx5_lag_is_active(ldev) && 834 ldev->mode != MLX5_LAG_MODE_MPESW; 835 } 836 837 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 838 { 839 return !do_bond && __mlx5_lag_is_active(ldev) && 840 ldev->mode != MLX5_LAG_MODE_MPESW; 841 } 842 843 static void mlx5_do_bond(struct mlx5_lag *ldev) 844 { 845 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 846 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 847 struct lag_tracker tracker = { }; 848 bool do_bond, roce_lag; 849 int err; 850 int i; 851 852 if (!mlx5_lag_is_ready(ldev)) { 853 do_bond = false; 854 } else { 855 /* VF LAG is in multipath mode, ignore bond change requests */ 856 if (mlx5_lag_is_multipath(dev0)) 857 return; 858 859 tracker = ldev->tracker; 860 861 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 862 } 863 864 if (do_bond && !__mlx5_lag_is_active(ldev)) { 865 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 866 867 roce_lag = mlx5_lag_is_roce_lag(ldev); 868 869 if (shared_fdb || roce_lag) 870 mlx5_lag_remove_devices(ldev); 871 872 err = mlx5_activate_lag(ldev, &tracker, 873 roce_lag ? MLX5_LAG_MODE_ROCE : 874 MLX5_LAG_MODE_SRIOV, 875 shared_fdb); 876 if (err) { 877 if (shared_fdb || roce_lag) 878 mlx5_lag_add_devices(ldev); 879 880 return; 881 } else if (roce_lag) { 882 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 883 mlx5_rescan_drivers_locked(dev0); 884 for (i = 1; i < ldev->ports; i++) 885 mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 886 } else if (shared_fdb) { 887 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 888 mlx5_rescan_drivers_locked(dev0); 889 890 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); 891 if (!err) 892 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); 893 894 if (err) { 895 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 896 mlx5_rescan_drivers_locked(dev0); 897 mlx5_deactivate_lag(ldev); 898 mlx5_lag_add_devices(ldev); 899 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 900 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 901 mlx5_core_err(dev0, "Failed to enable lag\n"); 902 return; 903 } 904 } 905 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 906 mlx5_modify_lag(ldev, &tracker); 907 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 908 mlx5_disable_lag(ldev); 909 } 910 } 911 912 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 913 { 914 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 915 } 916 917 static void mlx5_do_bond_work(struct work_struct *work) 918 { 919 struct delayed_work *delayed_work = to_delayed_work(work); 920 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 921 bond_work); 922 int status; 923 924 status = mlx5_dev_list_trylock(); 925 if (!status) { 926 mlx5_queue_bond_work(ldev, HZ); 927 return; 928 } 929 930 mutex_lock(&ldev->lock); 931 if (ldev->mode_changes_in_progress) { 932 mutex_unlock(&ldev->lock); 933 mlx5_dev_list_unlock(); 934 mlx5_queue_bond_work(ldev, HZ); 935 return; 936 } 937 938 mlx5_do_bond(ldev); 939 mutex_unlock(&ldev->lock); 940 mlx5_dev_list_unlock(); 941 } 942 943 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 944 struct lag_tracker *tracker, 945 struct netdev_notifier_changeupper_info *info) 946 { 947 struct net_device *upper = info->upper_dev, *ndev_tmp; 948 struct netdev_lag_upper_info *lag_upper_info = NULL; 949 bool is_bonded, is_in_lag, mode_supported; 950 bool has_inactive = 0; 951 struct slave *slave; 952 u8 bond_status = 0; 953 int num_slaves = 0; 954 int changed = 0; 955 int idx; 956 957 if (!netif_is_lag_master(upper)) 958 return 0; 959 960 if (info->linking) 961 lag_upper_info = info->upper_info; 962 963 /* The event may still be of interest if the slave does not belong to 964 * us, but is enslaved to a master which has one or more of our netdevs 965 * as slaves (e.g., if a new slave is added to a master that bonds two 966 * of our netdevs, we should unbond). 967 */ 968 rcu_read_lock(); 969 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 970 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 971 if (idx >= 0) { 972 slave = bond_slave_get_rcu(ndev_tmp); 973 if (slave) 974 has_inactive |= bond_is_slave_inactive(slave); 975 bond_status |= (1 << idx); 976 } 977 978 num_slaves++; 979 } 980 rcu_read_unlock(); 981 982 /* None of this lagdev's netdevs are slaves of this master. */ 983 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 984 return 0; 985 986 if (lag_upper_info) { 987 tracker->tx_type = lag_upper_info->tx_type; 988 tracker->hash_type = lag_upper_info->hash_type; 989 } 990 991 tracker->has_inactive = has_inactive; 992 /* Determine bonding status: 993 * A device is considered bonded if both its physical ports are slaves 994 * of the same lag master, and only them. 995 */ 996 is_in_lag = num_slaves == ldev->ports && 997 bond_status == GENMASK(ldev->ports - 1, 0); 998 999 /* Lag mode must be activebackup or hash. */ 1000 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 1001 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1002 1003 is_bonded = is_in_lag && mode_supported; 1004 if (tracker->is_bonded != is_bonded) { 1005 tracker->is_bonded = is_bonded; 1006 changed = 1; 1007 } 1008 1009 if (!is_in_lag) 1010 return changed; 1011 1012 if (!mlx5_lag_is_ready(ldev)) 1013 NL_SET_ERR_MSG_MOD(info->info.extack, 1014 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1015 else if (!mode_supported) 1016 NL_SET_ERR_MSG_MOD(info->info.extack, 1017 "Can't activate LAG offload, TX type isn't supported"); 1018 1019 return changed; 1020 } 1021 1022 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1023 struct lag_tracker *tracker, 1024 struct net_device *ndev, 1025 struct netdev_notifier_changelowerstate_info *info) 1026 { 1027 struct netdev_lag_lower_state_info *lag_lower_info; 1028 int idx; 1029 1030 if (!netif_is_lag_port(ndev)) 1031 return 0; 1032 1033 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1034 if (idx < 0) 1035 return 0; 1036 1037 /* This information is used to determine virtual to physical 1038 * port mapping. 1039 */ 1040 lag_lower_info = info->lower_state_info; 1041 if (!lag_lower_info) 1042 return 0; 1043 1044 tracker->netdev_state[idx] = *lag_lower_info; 1045 1046 return 1; 1047 } 1048 1049 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1050 struct lag_tracker *tracker, 1051 struct net_device *ndev) 1052 { 1053 struct net_device *ndev_tmp; 1054 struct slave *slave; 1055 bool has_inactive = 0; 1056 int idx; 1057 1058 if (!netif_is_lag_master(ndev)) 1059 return 0; 1060 1061 rcu_read_lock(); 1062 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1063 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1064 if (idx < 0) 1065 continue; 1066 1067 slave = bond_slave_get_rcu(ndev_tmp); 1068 if (slave) 1069 has_inactive |= bond_is_slave_inactive(slave); 1070 } 1071 rcu_read_unlock(); 1072 1073 if (tracker->has_inactive == has_inactive) 1074 return 0; 1075 1076 tracker->has_inactive = has_inactive; 1077 1078 return 1; 1079 } 1080 1081 /* this handler is always registered to netdev events */ 1082 static int mlx5_lag_netdev_event(struct notifier_block *this, 1083 unsigned long event, void *ptr) 1084 { 1085 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1086 struct lag_tracker tracker; 1087 struct mlx5_lag *ldev; 1088 int changed = 0; 1089 1090 if (event != NETDEV_CHANGEUPPER && 1091 event != NETDEV_CHANGELOWERSTATE && 1092 event != NETDEV_CHANGEINFODATA) 1093 return NOTIFY_DONE; 1094 1095 ldev = container_of(this, struct mlx5_lag, nb); 1096 1097 tracker = ldev->tracker; 1098 1099 switch (event) { 1100 case NETDEV_CHANGEUPPER: 1101 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1102 break; 1103 case NETDEV_CHANGELOWERSTATE: 1104 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1105 ndev, ptr); 1106 break; 1107 case NETDEV_CHANGEINFODATA: 1108 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1109 break; 1110 } 1111 1112 ldev->tracker = tracker; 1113 1114 if (changed) 1115 mlx5_queue_bond_work(ldev, 0); 1116 1117 return NOTIFY_DONE; 1118 } 1119 1120 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1121 struct mlx5_core_dev *dev, 1122 struct net_device *netdev) 1123 { 1124 unsigned int fn = mlx5_get_dev_index(dev); 1125 unsigned long flags; 1126 1127 if (fn >= ldev->ports) 1128 return; 1129 1130 spin_lock_irqsave(&lag_lock, flags); 1131 ldev->pf[fn].netdev = netdev; 1132 ldev->tracker.netdev_state[fn].link_up = 0; 1133 ldev->tracker.netdev_state[fn].tx_enabled = 0; 1134 spin_unlock_irqrestore(&lag_lock, flags); 1135 } 1136 1137 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1138 struct net_device *netdev) 1139 { 1140 unsigned long flags; 1141 int i; 1142 1143 spin_lock_irqsave(&lag_lock, flags); 1144 for (i = 0; i < ldev->ports; i++) { 1145 if (ldev->pf[i].netdev == netdev) { 1146 ldev->pf[i].netdev = NULL; 1147 break; 1148 } 1149 } 1150 spin_unlock_irqrestore(&lag_lock, flags); 1151 } 1152 1153 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1154 struct mlx5_core_dev *dev) 1155 { 1156 unsigned int fn = mlx5_get_dev_index(dev); 1157 1158 if (fn >= ldev->ports) 1159 return; 1160 1161 ldev->pf[fn].dev = dev; 1162 dev->priv.lag = ldev; 1163 } 1164 1165 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1166 struct mlx5_core_dev *dev) 1167 { 1168 int i; 1169 1170 for (i = 0; i < ldev->ports; i++) 1171 if (ldev->pf[i].dev == dev) 1172 break; 1173 1174 if (i == ldev->ports) 1175 return; 1176 1177 ldev->pf[i].dev = NULL; 1178 dev->priv.lag = NULL; 1179 } 1180 1181 /* Must be called with intf_mutex held */ 1182 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 1183 { 1184 struct mlx5_lag *ldev = NULL; 1185 struct mlx5_core_dev *tmp_dev; 1186 1187 tmp_dev = mlx5_get_next_phys_dev_lag(dev); 1188 if (tmp_dev) 1189 ldev = tmp_dev->priv.lag; 1190 1191 if (!ldev) { 1192 ldev = mlx5_lag_dev_alloc(dev); 1193 if (!ldev) { 1194 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1195 return 0; 1196 } 1197 mlx5_ldev_add_mdev(ldev, dev); 1198 return 0; 1199 } 1200 1201 mutex_lock(&ldev->lock); 1202 if (ldev->mode_changes_in_progress) { 1203 mutex_unlock(&ldev->lock); 1204 return -EAGAIN; 1205 } 1206 mlx5_ldev_get(ldev); 1207 mlx5_ldev_add_mdev(ldev, dev); 1208 mutex_unlock(&ldev->lock); 1209 1210 return 0; 1211 } 1212 1213 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 1214 { 1215 struct mlx5_lag *ldev; 1216 1217 ldev = mlx5_lag_dev(dev); 1218 if (!ldev) 1219 return; 1220 1221 /* mdev is being removed, might as well remove debugfs 1222 * as early as possible. 1223 */ 1224 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1225 recheck: 1226 mutex_lock(&ldev->lock); 1227 if (ldev->mode_changes_in_progress) { 1228 mutex_unlock(&ldev->lock); 1229 msleep(100); 1230 goto recheck; 1231 } 1232 mlx5_ldev_remove_mdev(ldev, dev); 1233 mutex_unlock(&ldev->lock); 1234 mlx5_ldev_put(ldev); 1235 } 1236 1237 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 1238 { 1239 int err; 1240 1241 if (!MLX5_CAP_GEN(dev, vport_group_manager) || 1242 !MLX5_CAP_GEN(dev, lag_master) || 1243 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || 1244 MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) 1245 return; 1246 1247 recheck: 1248 mlx5_dev_list_lock(); 1249 err = __mlx5_lag_dev_add_mdev(dev); 1250 mlx5_dev_list_unlock(); 1251 1252 if (err) { 1253 msleep(100); 1254 goto recheck; 1255 } 1256 mlx5_ldev_add_debugfs(dev); 1257 } 1258 1259 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1260 struct net_device *netdev) 1261 { 1262 struct mlx5_lag *ldev; 1263 bool lag_is_active; 1264 1265 ldev = mlx5_lag_dev(dev); 1266 if (!ldev) 1267 return; 1268 1269 mutex_lock(&ldev->lock); 1270 mlx5_ldev_remove_netdev(ldev, netdev); 1271 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1272 1273 lag_is_active = __mlx5_lag_is_active(ldev); 1274 mutex_unlock(&ldev->lock); 1275 1276 if (lag_is_active) 1277 mlx5_queue_bond_work(ldev, 0); 1278 } 1279 1280 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1281 struct net_device *netdev) 1282 { 1283 struct mlx5_lag *ldev; 1284 int i; 1285 1286 ldev = mlx5_lag_dev(dev); 1287 if (!ldev) 1288 return; 1289 1290 mutex_lock(&ldev->lock); 1291 mlx5_ldev_add_netdev(ldev, dev, netdev); 1292 1293 for (i = 0; i < ldev->ports; i++) 1294 if (!ldev->pf[i].netdev) 1295 break; 1296 1297 if (i >= ldev->ports) 1298 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1299 mutex_unlock(&ldev->lock); 1300 mlx5_queue_bond_work(ldev, 0); 1301 } 1302 1303 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 1304 { 1305 struct mlx5_lag *ldev; 1306 unsigned long flags; 1307 bool res; 1308 1309 spin_lock_irqsave(&lag_lock, flags); 1310 ldev = mlx5_lag_dev(dev); 1311 res = ldev && __mlx5_lag_is_roce(ldev); 1312 spin_unlock_irqrestore(&lag_lock, flags); 1313 1314 return res; 1315 } 1316 EXPORT_SYMBOL(mlx5_lag_is_roce); 1317 1318 bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 1319 { 1320 struct mlx5_lag *ldev; 1321 unsigned long flags; 1322 bool res; 1323 1324 spin_lock_irqsave(&lag_lock, flags); 1325 ldev = mlx5_lag_dev(dev); 1326 res = ldev && __mlx5_lag_is_active(ldev); 1327 spin_unlock_irqrestore(&lag_lock, flags); 1328 1329 return res; 1330 } 1331 EXPORT_SYMBOL(mlx5_lag_is_active); 1332 1333 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 1334 { 1335 struct mlx5_lag *ldev; 1336 unsigned long flags; 1337 bool res = 0; 1338 1339 spin_lock_irqsave(&lag_lock, flags); 1340 ldev = mlx5_lag_dev(dev); 1341 if (ldev) 1342 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 1343 spin_unlock_irqrestore(&lag_lock, flags); 1344 1345 return res; 1346 } 1347 EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 1348 1349 bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 1350 { 1351 struct mlx5_lag *ldev; 1352 unsigned long flags; 1353 bool res; 1354 1355 spin_lock_irqsave(&lag_lock, flags); 1356 ldev = mlx5_lag_dev(dev); 1357 res = ldev && __mlx5_lag_is_active(ldev) && 1358 dev == ldev->pf[MLX5_LAG_P1].dev; 1359 spin_unlock_irqrestore(&lag_lock, flags); 1360 1361 return res; 1362 } 1363 EXPORT_SYMBOL(mlx5_lag_is_master); 1364 1365 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 1366 { 1367 struct mlx5_lag *ldev; 1368 unsigned long flags; 1369 bool res; 1370 1371 spin_lock_irqsave(&lag_lock, flags); 1372 ldev = mlx5_lag_dev(dev); 1373 res = ldev && __mlx5_lag_is_sriov(ldev); 1374 spin_unlock_irqrestore(&lag_lock, flags); 1375 1376 return res; 1377 } 1378 EXPORT_SYMBOL(mlx5_lag_is_sriov); 1379 1380 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 1381 { 1382 struct mlx5_lag *ldev; 1383 unsigned long flags; 1384 bool res; 1385 1386 spin_lock_irqsave(&lag_lock, flags); 1387 ldev = mlx5_lag_dev(dev); 1388 res = ldev && __mlx5_lag_is_sriov(ldev) && 1389 test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1390 spin_unlock_irqrestore(&lag_lock, flags); 1391 1392 return res; 1393 } 1394 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 1395 1396 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1397 { 1398 struct mlx5_lag *ldev; 1399 1400 ldev = mlx5_lag_dev(dev); 1401 if (!ldev) 1402 return; 1403 1404 mlx5_dev_list_lock(); 1405 mutex_lock(&ldev->lock); 1406 1407 ldev->mode_changes_in_progress++; 1408 if (__mlx5_lag_is_active(ldev)) 1409 mlx5_disable_lag(ldev); 1410 1411 mutex_unlock(&ldev->lock); 1412 mlx5_dev_list_unlock(); 1413 } 1414 1415 void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 1416 { 1417 struct mlx5_lag *ldev; 1418 1419 ldev = mlx5_lag_dev(dev); 1420 if (!ldev) 1421 return; 1422 1423 mutex_lock(&ldev->lock); 1424 ldev->mode_changes_in_progress--; 1425 mutex_unlock(&ldev->lock); 1426 mlx5_queue_bond_work(ldev, 0); 1427 } 1428 1429 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) 1430 { 1431 struct net_device *ndev = NULL; 1432 struct mlx5_lag *ldev; 1433 unsigned long flags; 1434 int i; 1435 1436 spin_lock_irqsave(&lag_lock, flags); 1437 ldev = mlx5_lag_dev(dev); 1438 1439 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1440 goto unlock; 1441 1442 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1443 for (i = 0; i < ldev->ports; i++) 1444 if (ldev->tracker.netdev_state[i].tx_enabled) 1445 ndev = ldev->pf[i].netdev; 1446 if (!ndev) 1447 ndev = ldev->pf[ldev->ports - 1].netdev; 1448 } else { 1449 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1450 } 1451 if (ndev) 1452 dev_hold(ndev); 1453 1454 unlock: 1455 spin_unlock_irqrestore(&lag_lock, flags); 1456 1457 return ndev; 1458 } 1459 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); 1460 1461 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 1462 struct net_device *slave) 1463 { 1464 struct mlx5_lag *ldev; 1465 unsigned long flags; 1466 u8 port = 0; 1467 int i; 1468 1469 spin_lock_irqsave(&lag_lock, flags); 1470 ldev = mlx5_lag_dev(dev); 1471 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1472 goto unlock; 1473 1474 for (i = 0; i < ldev->ports; i++) { 1475 if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1476 port = i; 1477 break; 1478 } 1479 } 1480 1481 port = ldev->v2p_map[port * ldev->buckets]; 1482 1483 unlock: 1484 spin_unlock_irqrestore(&lag_lock, flags); 1485 return port; 1486 } 1487 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1488 1489 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1490 { 1491 struct mlx5_lag *ldev; 1492 1493 ldev = mlx5_lag_dev(dev); 1494 if (!ldev) 1495 return 0; 1496 1497 return ldev->ports; 1498 } 1499 EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1500 1501 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) 1502 { 1503 struct mlx5_core_dev *peer_dev = NULL; 1504 struct mlx5_lag *ldev; 1505 unsigned long flags; 1506 1507 spin_lock_irqsave(&lag_lock, flags); 1508 ldev = mlx5_lag_dev(dev); 1509 if (!ldev) 1510 goto unlock; 1511 1512 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? 1513 ldev->pf[MLX5_LAG_P2].dev : 1514 ldev->pf[MLX5_LAG_P1].dev; 1515 1516 unlock: 1517 spin_unlock_irqrestore(&lag_lock, flags); 1518 return peer_dev; 1519 } 1520 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); 1521 1522 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 1523 u64 *values, 1524 int num_counters, 1525 size_t *offsets) 1526 { 1527 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1528 struct mlx5_core_dev **mdev; 1529 struct mlx5_lag *ldev; 1530 unsigned long flags; 1531 int num_ports; 1532 int ret, i, j; 1533 void *out; 1534 1535 out = kvzalloc(outlen, GFP_KERNEL); 1536 if (!out) 1537 return -ENOMEM; 1538 1539 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1540 if (!mdev) { 1541 ret = -ENOMEM; 1542 goto free_out; 1543 } 1544 1545 memset(values, 0, sizeof(*values) * num_counters); 1546 1547 spin_lock_irqsave(&lag_lock, flags); 1548 ldev = mlx5_lag_dev(dev); 1549 if (ldev && __mlx5_lag_is_active(ldev)) { 1550 num_ports = ldev->ports; 1551 for (i = 0; i < ldev->ports; i++) 1552 mdev[i] = ldev->pf[i].dev; 1553 } else { 1554 num_ports = 1; 1555 mdev[MLX5_LAG_P1] = dev; 1556 } 1557 spin_unlock_irqrestore(&lag_lock, flags); 1558 1559 for (i = 0; i < num_ports; ++i) { 1560 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 1561 1562 MLX5_SET(query_cong_statistics_in, in, opcode, 1563 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 1564 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1565 out); 1566 if (ret) 1567 goto free_mdev; 1568 1569 for (j = 0; j < num_counters; ++j) 1570 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1571 } 1572 1573 free_mdev: 1574 kvfree(mdev); 1575 free_out: 1576 kvfree(out); 1577 return ret; 1578 } 1579 EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 1580