1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/netdevice.h> 34 #include <net/bonding.h> 35 #include <linux/mlx5/driver.h> 36 #include <linux/mlx5/eswitch.h> 37 #include <linux/mlx5/vport.h> 38 #include "lib/devcom.h" 39 #include "mlx5_core.h" 40 #include "eswitch.h" 41 #include "esw/acl/ofld.h" 42 #include "lag.h" 43 #include "mp.h" 44 #include "mpesw.h" 45 46 enum { 47 MLX5_LAG_EGRESS_PORT_1 = 1, 48 MLX5_LAG_EGRESS_PORT_2, 49 }; 50 51 /* General purpose, use for short periods of time. 52 * Beware of lock dependencies (preferably, no locks should be acquired 53 * under it). 54 */ 55 static DEFINE_SPINLOCK(lag_lock); 56 57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 58 { 59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 61 62 if (mode == MLX5_LAG_MODE_MPESW) 63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 64 65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 66 } 67 68 static u8 lag_active_port_bits(struct mlx5_lag *ldev) 69 { 70 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 71 u8 active_port = 0; 72 int num_enabled; 73 int idx; 74 75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, 76 &num_enabled); 77 for (idx = 0; idx < num_enabled; idx++) 78 active_port |= BIT_MASK(enabled_ports[idx]); 79 80 return active_port; 81 } 82 83 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, 84 unsigned long flags) 85 { 86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 87 &flags); 88 int port_sel_mode = get_port_sel_mode(mode, flags); 89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 96 switch (port_sel_mode) { 97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 100 break; 101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 103 break; 104 105 MLX5_SET(lagc, lag_ctx, active_port, 106 lag_active_port_bits(mlx5_lag_dev(dev))); 107 break; 108 default: 109 break; 110 } 111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 112 113 return mlx5_cmd_exec_in(dev, create_lag, in); 114 } 115 116 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 117 u8 *ports) 118 { 119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 121 122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 123 MLX5_SET(modify_lag_in, in, field_select, 0x1); 124 125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 127 128 return mlx5_cmd_exec_in(dev, modify_lag, in); 129 } 130 131 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 132 { 133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 134 135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 136 137 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 138 } 139 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 140 141 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 142 { 143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 144 145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 146 147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 148 } 149 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 150 151 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 152 u8 *ports, int *num_disabled) 153 { 154 int i; 155 156 *num_disabled = 0; 157 for (i = 0; i < num_ports; i++) { 158 if (!tracker->netdev_state[i].tx_enabled || 159 !tracker->netdev_state[i].link_up) 160 ports[(*num_disabled)++] = i; 161 } 162 } 163 164 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 165 u8 *ports, int *num_enabled) 166 { 167 int i; 168 169 *num_enabled = 0; 170 for (i = 0; i < num_ports; i++) { 171 if (tracker->netdev_state[i].tx_enabled && 172 tracker->netdev_state[i].link_up) 173 ports[(*num_enabled)++] = i; 174 } 175 176 if (*num_enabled == 0) 177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 178 } 179 180 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 181 struct mlx5_lag *ldev, 182 struct lag_tracker *tracker, 183 unsigned long flags) 184 { 185 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 186 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 187 int written = 0; 188 int num_enabled; 189 int idx; 190 int err; 191 int i; 192 int j; 193 194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 196 &num_enabled); 197 for (i = 0; i < num_enabled; i++) { 198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 199 if (err != 3) 200 return; 201 written += err; 202 } 203 buf[written - 2] = 0; 204 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 205 } else { 206 for (i = 0; i < ldev->ports; i++) { 207 for (j = 0; j < ldev->buckets; j++) { 208 idx = i * ldev->buckets + j; 209 err = scnprintf(buf + written, 10, 210 " port %d:%d", i + 1, ldev->v2p_map[idx]); 211 if (err != 9) 212 return; 213 written += err; 214 } 215 } 216 mlx5_core_info(dev, "lag map:%s\n", buf); 217 } 218 } 219 220 static int mlx5_lag_netdev_event(struct notifier_block *this, 221 unsigned long event, void *ptr); 222 static void mlx5_do_bond_work(struct work_struct *work); 223 224 static void mlx5_ldev_free(struct kref *ref) 225 { 226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 227 228 if (ldev->nb.notifier_call) 229 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 230 mlx5_lag_mp_cleanup(ldev); 231 cancel_delayed_work_sync(&ldev->bond_work); 232 destroy_workqueue(ldev->wq); 233 mlx5_lag_mpesw_cleanup(ldev); 234 mutex_destroy(&ldev->lock); 235 kfree(ldev); 236 } 237 238 static void mlx5_ldev_put(struct mlx5_lag *ldev) 239 { 240 kref_put(&ldev->ref, mlx5_ldev_free); 241 } 242 243 static void mlx5_ldev_get(struct mlx5_lag *ldev) 244 { 245 kref_get(&ldev->ref); 246 } 247 248 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 249 { 250 struct mlx5_lag *ldev; 251 int err; 252 253 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); 254 if (!ldev) 255 return NULL; 256 257 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 258 if (!ldev->wq) { 259 kfree(ldev); 260 return NULL; 261 } 262 263 kref_init(&ldev->ref); 264 mutex_init(&ldev->lock); 265 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 266 267 ldev->nb.notifier_call = mlx5_lag_netdev_event; 268 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { 269 ldev->nb.notifier_call = NULL; 270 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 271 } 272 ldev->mode = MLX5_LAG_MODE_NONE; 273 274 err = mlx5_lag_mp_init(ldev); 275 if (err) 276 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 277 err); 278 279 mlx5_lag_mpesw_init(ldev); 280 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 281 ldev->buckets = 1; 282 283 return ldev; 284 } 285 286 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 287 struct net_device *ndev) 288 { 289 int i; 290 291 for (i = 0; i < ldev->ports; i++) 292 if (ldev->pf[i].netdev == ndev) 293 return i; 294 295 return -ENOENT; 296 } 297 298 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 299 { 300 return ldev->mode == MLX5_LAG_MODE_ROCE; 301 } 302 303 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 304 { 305 return ldev->mode == MLX5_LAG_MODE_SRIOV; 306 } 307 308 /* Create a mapping between steering slots and active ports. 309 * As we have ldev->buckets slots per port first assume the native 310 * mapping should be used. 311 * If there are ports that are disabled fill the relevant slots 312 * with mapping that points to active ports. 313 */ 314 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 315 u8 num_ports, 316 u8 buckets, 317 u8 *ports) 318 { 319 int disabled[MLX5_MAX_PORTS] = {}; 320 int enabled[MLX5_MAX_PORTS] = {}; 321 int disabled_ports_num = 0; 322 int enabled_ports_num = 0; 323 int idx; 324 u32 rand; 325 int i; 326 int j; 327 328 for (i = 0; i < num_ports; i++) { 329 if (tracker->netdev_state[i].tx_enabled && 330 tracker->netdev_state[i].link_up) 331 enabled[enabled_ports_num++] = i; 332 else 333 disabled[disabled_ports_num++] = i; 334 } 335 336 /* Use native mapping by default where each port's buckets 337 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 338 */ 339 for (i = 0; i < num_ports; i++) 340 for (j = 0; j < buckets; j++) { 341 idx = i * buckets + j; 342 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 343 } 344 345 /* If all ports are disabled/enabled keep native mapping */ 346 if (enabled_ports_num == num_ports || 347 disabled_ports_num == num_ports) 348 return; 349 350 /* Go over the disabled ports and for each assign a random active port */ 351 for (i = 0; i < disabled_ports_num; i++) { 352 for (j = 0; j < buckets; j++) { 353 get_random_bytes(&rand, 4); 354 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 355 } 356 } 357 } 358 359 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 360 { 361 int i; 362 363 for (i = 0; i < ldev->ports; i++) 364 if (ldev->pf[i].has_drop) 365 return true; 366 return false; 367 } 368 369 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 370 { 371 int i; 372 373 for (i = 0; i < ldev->ports; i++) { 374 if (!ldev->pf[i].has_drop) 375 continue; 376 377 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, 378 MLX5_VPORT_UPLINK); 379 ldev->pf[i].has_drop = false; 380 } 381 } 382 383 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 384 struct lag_tracker *tracker) 385 { 386 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 387 struct mlx5_core_dev *dev; 388 int disabled_index; 389 int num_disabled; 390 int err; 391 int i; 392 393 /* First delete the current drop rule so there won't be any dropped 394 * packets 395 */ 396 mlx5_lag_drop_rule_cleanup(ldev); 397 398 if (!ldev->tracker.has_inactive) 399 return; 400 401 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 402 403 for (i = 0; i < num_disabled; i++) { 404 disabled_index = disabled_ports[i]; 405 dev = ldev->pf[disabled_index].dev; 406 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 407 MLX5_VPORT_UPLINK); 408 if (!err) 409 ldev->pf[disabled_index].has_drop = true; 410 else 411 mlx5_core_err(dev, 412 "Failed to create lag drop rule, error: %d", err); 413 } 414 } 415 416 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 417 { 418 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 419 void *lag_ctx; 420 421 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 422 423 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 424 MLX5_SET(modify_lag_in, in, field_select, 0x2); 425 426 MLX5_SET(lagc, lag_ctx, active_port, ports); 427 428 return mlx5_cmd_exec_in(dev, modify_lag, in); 429 } 430 431 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 432 { 433 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 434 u8 active_ports; 435 int ret; 436 437 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 438 ret = mlx5_lag_port_sel_modify(ldev, ports); 439 if (ret || 440 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 441 return ret; 442 443 active_ports = lag_active_port_bits(ldev); 444 445 return mlx5_cmd_modify_active_port(dev0, active_ports); 446 } 447 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 448 } 449 450 void mlx5_modify_lag(struct mlx5_lag *ldev, 451 struct lag_tracker *tracker) 452 { 453 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 454 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 455 int idx; 456 int err; 457 int i; 458 int j; 459 460 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 461 462 for (i = 0; i < ldev->ports; i++) { 463 for (j = 0; j < ldev->buckets; j++) { 464 idx = i * ldev->buckets + j; 465 if (ports[idx] == ldev->v2p_map[idx]) 466 continue; 467 err = _mlx5_modify_lag(ldev, ports); 468 if (err) { 469 mlx5_core_err(dev0, 470 "Failed to modify LAG (%d)\n", 471 err); 472 return; 473 } 474 memcpy(ldev->v2p_map, ports, sizeof(ports)); 475 476 mlx5_lag_print_mapping(dev0, ldev, tracker, 477 ldev->mode_flags); 478 break; 479 } 480 } 481 482 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 483 !(ldev->mode == MLX5_LAG_MODE_ROCE)) 484 mlx5_lag_drop_rule_setup(ldev, tracker); 485 } 486 487 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 488 unsigned long *flags) 489 { 490 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 491 492 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 493 if (ldev->ports > 2) 494 return -EINVAL; 495 return 0; 496 } 497 498 if (ldev->ports > 2) 499 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 500 501 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 502 503 return 0; 504 } 505 506 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 507 struct lag_tracker *tracker, 508 enum mlx5_lag_mode mode, 509 unsigned long *flags) 510 { 511 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 512 513 if (mode == MLX5_LAG_MODE_MPESW) 514 return; 515 516 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 517 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) 518 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 519 } 520 521 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 522 struct lag_tracker *tracker, bool shared_fdb, 523 unsigned long *flags) 524 { 525 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 526 527 *flags = 0; 528 if (shared_fdb) { 529 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 530 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 531 } 532 533 if (mode == MLX5_LAG_MODE_MPESW) 534 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 535 536 if (roce_lag) 537 return mlx5_lag_set_port_sel_mode_roce(ldev, flags); 538 539 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); 540 return 0; 541 } 542 543 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 544 { 545 int port_sel_mode = get_port_sel_mode(mode, flags); 546 547 switch (port_sel_mode) { 548 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 549 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 550 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 551 default: return "invalid"; 552 } 553 } 554 555 static int mlx5_create_lag(struct mlx5_lag *ldev, 556 struct lag_tracker *tracker, 557 enum mlx5_lag_mode mode, 558 unsigned long flags) 559 { 560 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 561 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 562 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 563 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 564 int err; 565 566 if (tracker) 567 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 568 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 569 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 570 571 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); 572 if (err) { 573 mlx5_core_err(dev0, 574 "Failed to create LAG (%d)\n", 575 err); 576 return err; 577 } 578 579 if (shared_fdb) { 580 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, 581 dev1->priv.eswitch); 582 if (err) 583 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 584 else 585 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 586 } 587 588 if (err) { 589 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 590 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 591 mlx5_core_err(dev0, 592 "Failed to deactivate RoCE LAG; driver restart required\n"); 593 } 594 595 return err; 596 } 597 598 int mlx5_activate_lag(struct mlx5_lag *ldev, 599 struct lag_tracker *tracker, 600 enum mlx5_lag_mode mode, 601 bool shared_fdb) 602 { 603 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 604 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 605 unsigned long flags = 0; 606 int err; 607 608 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 609 if (err) 610 return err; 611 612 if (mode != MLX5_LAG_MODE_MPESW) { 613 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 614 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 615 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 616 ldev->v2p_map); 617 if (err) { 618 mlx5_core_err(dev0, 619 "Failed to create LAG port selection(%d)\n", 620 err); 621 return err; 622 } 623 } 624 } 625 626 err = mlx5_create_lag(ldev, tracker, mode, flags); 627 if (err) { 628 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 629 mlx5_lag_port_sel_destroy(ldev); 630 if (roce_lag) 631 mlx5_core_err(dev0, 632 "Failed to activate RoCE LAG\n"); 633 else 634 mlx5_core_err(dev0, 635 "Failed to activate VF LAG\n" 636 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 637 return err; 638 } 639 640 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 641 !roce_lag) 642 mlx5_lag_drop_rule_setup(ldev, tracker); 643 644 ldev->mode = mode; 645 ldev->mode_flags = flags; 646 return 0; 647 } 648 649 static int mlx5_deactivate_lag(struct mlx5_lag *ldev) 650 { 651 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 652 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 653 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 654 bool roce_lag = __mlx5_lag_is_roce(ldev); 655 unsigned long flags = ldev->mode_flags; 656 int err; 657 658 ldev->mode = MLX5_LAG_MODE_NONE; 659 ldev->mode_flags = 0; 660 mlx5_lag_mp_reset(ldev); 661 662 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 663 mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, 664 dev1->priv.eswitch); 665 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 666 } 667 668 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 669 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 670 if (err) { 671 if (roce_lag) { 672 mlx5_core_err(dev0, 673 "Failed to deactivate RoCE LAG; driver restart required\n"); 674 } else { 675 mlx5_core_err(dev0, 676 "Failed to deactivate VF LAG; driver restart required\n" 677 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 678 } 679 return err; 680 } 681 682 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 683 mlx5_lag_port_sel_destroy(ldev); 684 if (mlx5_lag_has_drop_rule(ldev)) 685 mlx5_lag_drop_rule_cleanup(ldev); 686 687 return 0; 688 } 689 690 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 691 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 692 { 693 #ifdef CONFIG_MLX5_ESWITCH 694 struct mlx5_core_dev *dev; 695 u8 mode; 696 #endif 697 int i; 698 699 for (i = 0; i < ldev->ports; i++) 700 if (!ldev->pf[i].dev) 701 return false; 702 703 #ifdef CONFIG_MLX5_ESWITCH 704 for (i = 0; i < ldev->ports; i++) { 705 dev = ldev->pf[i].dev; 706 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) 707 return false; 708 } 709 710 dev = ldev->pf[MLX5_LAG_P1].dev; 711 mode = mlx5_eswitch_mode(dev); 712 for (i = 0; i < ldev->ports; i++) 713 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 714 return false; 715 716 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 717 return false; 718 #else 719 for (i = 0; i < ldev->ports; i++) 720 if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 721 return false; 722 #endif 723 return true; 724 } 725 726 static void mlx5_lag_add_devices(struct mlx5_lag *ldev) 727 { 728 int i; 729 730 for (i = 0; i < ldev->ports; i++) { 731 if (!ldev->pf[i].dev) 732 continue; 733 734 if (ldev->pf[i].dev->priv.flags & 735 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 736 continue; 737 738 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 739 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 740 } 741 } 742 743 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 744 { 745 int i; 746 747 for (i = 0; i < ldev->ports; i++) { 748 if (!ldev->pf[i].dev) 749 continue; 750 751 if (ldev->pf[i].dev->priv.flags & 752 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 753 continue; 754 755 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 756 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 757 } 758 } 759 760 void mlx5_disable_lag(struct mlx5_lag *ldev) 761 { 762 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 763 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 764 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 765 bool roce_lag; 766 int err; 767 int i; 768 769 roce_lag = __mlx5_lag_is_roce(ldev); 770 771 if (shared_fdb) { 772 mlx5_lag_remove_devices(ldev); 773 } else if (roce_lag) { 774 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 775 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 776 mlx5_rescan_drivers_locked(dev0); 777 } 778 for (i = 1; i < ldev->ports; i++) 779 mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 780 } 781 782 err = mlx5_deactivate_lag(ldev); 783 if (err) 784 return; 785 786 if (shared_fdb || roce_lag) 787 mlx5_lag_add_devices(ldev); 788 789 if (shared_fdb) { 790 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 791 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 792 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 793 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 794 } 795 } 796 797 bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) 798 { 799 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 800 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 801 802 if (is_mdev_switchdev_mode(dev0) && 803 is_mdev_switchdev_mode(dev1) && 804 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && 805 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && 806 mlx5_devcom_is_paired(dev0->priv.devcom, 807 MLX5_DEVCOM_ESW_OFFLOADS) && 808 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && 809 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && 810 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) 811 return true; 812 813 return false; 814 } 815 816 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 817 { 818 bool roce_lag = true; 819 int i; 820 821 for (i = 0; i < ldev->ports; i++) 822 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 823 824 #ifdef CONFIG_MLX5_ESWITCH 825 for (i = 0; i < ldev->ports; i++) 826 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); 827 #endif 828 829 return roce_lag; 830 } 831 832 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 833 { 834 return do_bond && __mlx5_lag_is_active(ldev) && 835 ldev->mode != MLX5_LAG_MODE_MPESW; 836 } 837 838 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 839 { 840 return !do_bond && __mlx5_lag_is_active(ldev) && 841 ldev->mode != MLX5_LAG_MODE_MPESW; 842 } 843 844 static void mlx5_do_bond(struct mlx5_lag *ldev) 845 { 846 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 847 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 848 struct lag_tracker tracker = { }; 849 bool do_bond, roce_lag; 850 int err; 851 int i; 852 853 if (!mlx5_lag_is_ready(ldev)) { 854 do_bond = false; 855 } else { 856 /* VF LAG is in multipath mode, ignore bond change requests */ 857 if (mlx5_lag_is_multipath(dev0)) 858 return; 859 860 tracker = ldev->tracker; 861 862 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 863 } 864 865 if (do_bond && !__mlx5_lag_is_active(ldev)) { 866 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 867 868 roce_lag = mlx5_lag_is_roce_lag(ldev); 869 870 if (shared_fdb || roce_lag) 871 mlx5_lag_remove_devices(ldev); 872 873 err = mlx5_activate_lag(ldev, &tracker, 874 roce_lag ? MLX5_LAG_MODE_ROCE : 875 MLX5_LAG_MODE_SRIOV, 876 shared_fdb); 877 if (err) { 878 if (shared_fdb || roce_lag) 879 mlx5_lag_add_devices(ldev); 880 881 return; 882 } else if (roce_lag) { 883 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 884 mlx5_rescan_drivers_locked(dev0); 885 for (i = 1; i < ldev->ports; i++) 886 mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 887 } else if (shared_fdb) { 888 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 889 mlx5_rescan_drivers_locked(dev0); 890 891 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); 892 if (!err) 893 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); 894 895 if (err) { 896 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 897 mlx5_rescan_drivers_locked(dev0); 898 mlx5_deactivate_lag(ldev); 899 mlx5_lag_add_devices(ldev); 900 mlx5_eswitch_reload_reps(dev0->priv.eswitch); 901 mlx5_eswitch_reload_reps(dev1->priv.eswitch); 902 mlx5_core_err(dev0, "Failed to enable lag\n"); 903 return; 904 } 905 } 906 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 907 mlx5_modify_lag(ldev, &tracker); 908 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 909 mlx5_disable_lag(ldev); 910 } 911 } 912 913 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 914 { 915 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 916 } 917 918 static void mlx5_do_bond_work(struct work_struct *work) 919 { 920 struct delayed_work *delayed_work = to_delayed_work(work); 921 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 922 bond_work); 923 int status; 924 925 status = mlx5_dev_list_trylock(); 926 if (!status) { 927 mlx5_queue_bond_work(ldev, HZ); 928 return; 929 } 930 931 mutex_lock(&ldev->lock); 932 if (ldev->mode_changes_in_progress) { 933 mutex_unlock(&ldev->lock); 934 mlx5_dev_list_unlock(); 935 mlx5_queue_bond_work(ldev, HZ); 936 return; 937 } 938 939 mlx5_do_bond(ldev); 940 mutex_unlock(&ldev->lock); 941 mlx5_dev_list_unlock(); 942 } 943 944 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 945 struct lag_tracker *tracker, 946 struct netdev_notifier_changeupper_info *info) 947 { 948 struct net_device *upper = info->upper_dev, *ndev_tmp; 949 struct netdev_lag_upper_info *lag_upper_info = NULL; 950 bool is_bonded, is_in_lag, mode_supported; 951 bool has_inactive = 0; 952 struct slave *slave; 953 u8 bond_status = 0; 954 int num_slaves = 0; 955 int changed = 0; 956 int idx; 957 958 if (!netif_is_lag_master(upper)) 959 return 0; 960 961 if (info->linking) 962 lag_upper_info = info->upper_info; 963 964 /* The event may still be of interest if the slave does not belong to 965 * us, but is enslaved to a master which has one or more of our netdevs 966 * as slaves (e.g., if a new slave is added to a master that bonds two 967 * of our netdevs, we should unbond). 968 */ 969 rcu_read_lock(); 970 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 971 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 972 if (idx >= 0) { 973 slave = bond_slave_get_rcu(ndev_tmp); 974 if (slave) 975 has_inactive |= bond_is_slave_inactive(slave); 976 bond_status |= (1 << idx); 977 } 978 979 num_slaves++; 980 } 981 rcu_read_unlock(); 982 983 /* None of this lagdev's netdevs are slaves of this master. */ 984 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 985 return 0; 986 987 if (lag_upper_info) { 988 tracker->tx_type = lag_upper_info->tx_type; 989 tracker->hash_type = lag_upper_info->hash_type; 990 } 991 992 tracker->has_inactive = has_inactive; 993 /* Determine bonding status: 994 * A device is considered bonded if both its physical ports are slaves 995 * of the same lag master, and only them. 996 */ 997 is_in_lag = num_slaves == ldev->ports && 998 bond_status == GENMASK(ldev->ports - 1, 0); 999 1000 /* Lag mode must be activebackup or hash. */ 1001 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 1002 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1003 1004 is_bonded = is_in_lag && mode_supported; 1005 if (tracker->is_bonded != is_bonded) { 1006 tracker->is_bonded = is_bonded; 1007 changed = 1; 1008 } 1009 1010 if (!is_in_lag) 1011 return changed; 1012 1013 if (!mlx5_lag_is_ready(ldev)) 1014 NL_SET_ERR_MSG_MOD(info->info.extack, 1015 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1016 else if (!mode_supported) 1017 NL_SET_ERR_MSG_MOD(info->info.extack, 1018 "Can't activate LAG offload, TX type isn't supported"); 1019 1020 return changed; 1021 } 1022 1023 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1024 struct lag_tracker *tracker, 1025 struct net_device *ndev, 1026 struct netdev_notifier_changelowerstate_info *info) 1027 { 1028 struct netdev_lag_lower_state_info *lag_lower_info; 1029 int idx; 1030 1031 if (!netif_is_lag_port(ndev)) 1032 return 0; 1033 1034 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1035 if (idx < 0) 1036 return 0; 1037 1038 /* This information is used to determine virtual to physical 1039 * port mapping. 1040 */ 1041 lag_lower_info = info->lower_state_info; 1042 if (!lag_lower_info) 1043 return 0; 1044 1045 tracker->netdev_state[idx] = *lag_lower_info; 1046 1047 return 1; 1048 } 1049 1050 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1051 struct lag_tracker *tracker, 1052 struct net_device *ndev) 1053 { 1054 struct net_device *ndev_tmp; 1055 struct slave *slave; 1056 bool has_inactive = 0; 1057 int idx; 1058 1059 if (!netif_is_lag_master(ndev)) 1060 return 0; 1061 1062 rcu_read_lock(); 1063 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1064 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1065 if (idx < 0) 1066 continue; 1067 1068 slave = bond_slave_get_rcu(ndev_tmp); 1069 if (slave) 1070 has_inactive |= bond_is_slave_inactive(slave); 1071 } 1072 rcu_read_unlock(); 1073 1074 if (tracker->has_inactive == has_inactive) 1075 return 0; 1076 1077 tracker->has_inactive = has_inactive; 1078 1079 return 1; 1080 } 1081 1082 /* this handler is always registered to netdev events */ 1083 static int mlx5_lag_netdev_event(struct notifier_block *this, 1084 unsigned long event, void *ptr) 1085 { 1086 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1087 struct lag_tracker tracker; 1088 struct mlx5_lag *ldev; 1089 int changed = 0; 1090 1091 if (event != NETDEV_CHANGEUPPER && 1092 event != NETDEV_CHANGELOWERSTATE && 1093 event != NETDEV_CHANGEINFODATA) 1094 return NOTIFY_DONE; 1095 1096 ldev = container_of(this, struct mlx5_lag, nb); 1097 1098 tracker = ldev->tracker; 1099 1100 switch (event) { 1101 case NETDEV_CHANGEUPPER: 1102 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1103 break; 1104 case NETDEV_CHANGELOWERSTATE: 1105 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1106 ndev, ptr); 1107 break; 1108 case NETDEV_CHANGEINFODATA: 1109 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1110 break; 1111 } 1112 1113 ldev->tracker = tracker; 1114 1115 if (changed) 1116 mlx5_queue_bond_work(ldev, 0); 1117 1118 return NOTIFY_DONE; 1119 } 1120 1121 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1122 struct mlx5_core_dev *dev, 1123 struct net_device *netdev) 1124 { 1125 unsigned int fn = mlx5_get_dev_index(dev); 1126 unsigned long flags; 1127 1128 if (fn >= ldev->ports) 1129 return; 1130 1131 spin_lock_irqsave(&lag_lock, flags); 1132 ldev->pf[fn].netdev = netdev; 1133 ldev->tracker.netdev_state[fn].link_up = 0; 1134 ldev->tracker.netdev_state[fn].tx_enabled = 0; 1135 spin_unlock_irqrestore(&lag_lock, flags); 1136 } 1137 1138 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1139 struct net_device *netdev) 1140 { 1141 unsigned long flags; 1142 int i; 1143 1144 spin_lock_irqsave(&lag_lock, flags); 1145 for (i = 0; i < ldev->ports; i++) { 1146 if (ldev->pf[i].netdev == netdev) { 1147 ldev->pf[i].netdev = NULL; 1148 break; 1149 } 1150 } 1151 spin_unlock_irqrestore(&lag_lock, flags); 1152 } 1153 1154 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1155 struct mlx5_core_dev *dev) 1156 { 1157 unsigned int fn = mlx5_get_dev_index(dev); 1158 1159 if (fn >= ldev->ports) 1160 return; 1161 1162 ldev->pf[fn].dev = dev; 1163 dev->priv.lag = ldev; 1164 } 1165 1166 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1167 struct mlx5_core_dev *dev) 1168 { 1169 int i; 1170 1171 for (i = 0; i < ldev->ports; i++) 1172 if (ldev->pf[i].dev == dev) 1173 break; 1174 1175 if (i == ldev->ports) 1176 return; 1177 1178 ldev->pf[i].dev = NULL; 1179 dev->priv.lag = NULL; 1180 } 1181 1182 /* Must be called with intf_mutex held */ 1183 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 1184 { 1185 struct mlx5_lag *ldev = NULL; 1186 struct mlx5_core_dev *tmp_dev; 1187 1188 tmp_dev = mlx5_get_next_phys_dev_lag(dev); 1189 if (tmp_dev) 1190 ldev = tmp_dev->priv.lag; 1191 1192 if (!ldev) { 1193 ldev = mlx5_lag_dev_alloc(dev); 1194 if (!ldev) { 1195 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1196 return 0; 1197 } 1198 mlx5_ldev_add_mdev(ldev, dev); 1199 return 0; 1200 } 1201 1202 mutex_lock(&ldev->lock); 1203 if (ldev->mode_changes_in_progress) { 1204 mutex_unlock(&ldev->lock); 1205 return -EAGAIN; 1206 } 1207 mlx5_ldev_get(ldev); 1208 mlx5_ldev_add_mdev(ldev, dev); 1209 mutex_unlock(&ldev->lock); 1210 1211 return 0; 1212 } 1213 1214 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 1215 { 1216 struct mlx5_lag *ldev; 1217 1218 ldev = mlx5_lag_dev(dev); 1219 if (!ldev) 1220 return; 1221 1222 /* mdev is being removed, might as well remove debugfs 1223 * as early as possible. 1224 */ 1225 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1226 recheck: 1227 mutex_lock(&ldev->lock); 1228 if (ldev->mode_changes_in_progress) { 1229 mutex_unlock(&ldev->lock); 1230 msleep(100); 1231 goto recheck; 1232 } 1233 mlx5_ldev_remove_mdev(ldev, dev); 1234 mutex_unlock(&ldev->lock); 1235 mlx5_ldev_put(ldev); 1236 } 1237 1238 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 1239 { 1240 int err; 1241 1242 if (!MLX5_CAP_GEN(dev, vport_group_manager) || 1243 !MLX5_CAP_GEN(dev, lag_master) || 1244 (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || 1245 MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) 1246 return; 1247 1248 recheck: 1249 mlx5_dev_list_lock(); 1250 err = __mlx5_lag_dev_add_mdev(dev); 1251 mlx5_dev_list_unlock(); 1252 1253 if (err) { 1254 msleep(100); 1255 goto recheck; 1256 } 1257 mlx5_ldev_add_debugfs(dev); 1258 } 1259 1260 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1261 struct net_device *netdev) 1262 { 1263 struct mlx5_lag *ldev; 1264 bool lag_is_active; 1265 1266 ldev = mlx5_lag_dev(dev); 1267 if (!ldev) 1268 return; 1269 1270 mutex_lock(&ldev->lock); 1271 mlx5_ldev_remove_netdev(ldev, netdev); 1272 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1273 1274 lag_is_active = __mlx5_lag_is_active(ldev); 1275 mutex_unlock(&ldev->lock); 1276 1277 if (lag_is_active) 1278 mlx5_queue_bond_work(ldev, 0); 1279 } 1280 1281 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1282 struct net_device *netdev) 1283 { 1284 struct mlx5_lag *ldev; 1285 int i; 1286 1287 ldev = mlx5_lag_dev(dev); 1288 if (!ldev) 1289 return; 1290 1291 mutex_lock(&ldev->lock); 1292 mlx5_ldev_add_netdev(ldev, dev, netdev); 1293 1294 for (i = 0; i < ldev->ports; i++) 1295 if (!ldev->pf[i].netdev) 1296 break; 1297 1298 if (i >= ldev->ports) 1299 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1300 mutex_unlock(&ldev->lock); 1301 mlx5_queue_bond_work(ldev, 0); 1302 } 1303 1304 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 1305 { 1306 struct mlx5_lag *ldev; 1307 unsigned long flags; 1308 bool res; 1309 1310 spin_lock_irqsave(&lag_lock, flags); 1311 ldev = mlx5_lag_dev(dev); 1312 res = ldev && __mlx5_lag_is_roce(ldev); 1313 spin_unlock_irqrestore(&lag_lock, flags); 1314 1315 return res; 1316 } 1317 EXPORT_SYMBOL(mlx5_lag_is_roce); 1318 1319 bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 1320 { 1321 struct mlx5_lag *ldev; 1322 unsigned long flags; 1323 bool res; 1324 1325 spin_lock_irqsave(&lag_lock, flags); 1326 ldev = mlx5_lag_dev(dev); 1327 res = ldev && __mlx5_lag_is_active(ldev); 1328 spin_unlock_irqrestore(&lag_lock, flags); 1329 1330 return res; 1331 } 1332 EXPORT_SYMBOL(mlx5_lag_is_active); 1333 1334 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 1335 { 1336 struct mlx5_lag *ldev; 1337 unsigned long flags; 1338 bool res = 0; 1339 1340 spin_lock_irqsave(&lag_lock, flags); 1341 ldev = mlx5_lag_dev(dev); 1342 if (ldev) 1343 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 1344 spin_unlock_irqrestore(&lag_lock, flags); 1345 1346 return res; 1347 } 1348 EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 1349 1350 bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 1351 { 1352 struct mlx5_lag *ldev; 1353 unsigned long flags; 1354 bool res; 1355 1356 spin_lock_irqsave(&lag_lock, flags); 1357 ldev = mlx5_lag_dev(dev); 1358 res = ldev && __mlx5_lag_is_active(ldev) && 1359 dev == ldev->pf[MLX5_LAG_P1].dev; 1360 spin_unlock_irqrestore(&lag_lock, flags); 1361 1362 return res; 1363 } 1364 EXPORT_SYMBOL(mlx5_lag_is_master); 1365 1366 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 1367 { 1368 struct mlx5_lag *ldev; 1369 unsigned long flags; 1370 bool res; 1371 1372 spin_lock_irqsave(&lag_lock, flags); 1373 ldev = mlx5_lag_dev(dev); 1374 res = ldev && __mlx5_lag_is_sriov(ldev); 1375 spin_unlock_irqrestore(&lag_lock, flags); 1376 1377 return res; 1378 } 1379 EXPORT_SYMBOL(mlx5_lag_is_sriov); 1380 1381 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 1382 { 1383 struct mlx5_lag *ldev; 1384 unsigned long flags; 1385 bool res; 1386 1387 spin_lock_irqsave(&lag_lock, flags); 1388 ldev = mlx5_lag_dev(dev); 1389 res = ldev && __mlx5_lag_is_sriov(ldev) && 1390 test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1391 spin_unlock_irqrestore(&lag_lock, flags); 1392 1393 return res; 1394 } 1395 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 1396 1397 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1398 { 1399 struct mlx5_lag *ldev; 1400 1401 ldev = mlx5_lag_dev(dev); 1402 if (!ldev) 1403 return; 1404 1405 mlx5_dev_list_lock(); 1406 mutex_lock(&ldev->lock); 1407 1408 ldev->mode_changes_in_progress++; 1409 if (__mlx5_lag_is_active(ldev)) 1410 mlx5_disable_lag(ldev); 1411 1412 mutex_unlock(&ldev->lock); 1413 mlx5_dev_list_unlock(); 1414 } 1415 1416 void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 1417 { 1418 struct mlx5_lag *ldev; 1419 1420 ldev = mlx5_lag_dev(dev); 1421 if (!ldev) 1422 return; 1423 1424 mutex_lock(&ldev->lock); 1425 ldev->mode_changes_in_progress--; 1426 mutex_unlock(&ldev->lock); 1427 mlx5_queue_bond_work(ldev, 0); 1428 } 1429 1430 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) 1431 { 1432 struct net_device *ndev = NULL; 1433 struct mlx5_lag *ldev; 1434 unsigned long flags; 1435 int i; 1436 1437 spin_lock_irqsave(&lag_lock, flags); 1438 ldev = mlx5_lag_dev(dev); 1439 1440 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1441 goto unlock; 1442 1443 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1444 for (i = 0; i < ldev->ports; i++) 1445 if (ldev->tracker.netdev_state[i].tx_enabled) 1446 ndev = ldev->pf[i].netdev; 1447 if (!ndev) 1448 ndev = ldev->pf[ldev->ports - 1].netdev; 1449 } else { 1450 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1451 } 1452 if (ndev) 1453 dev_hold(ndev); 1454 1455 unlock: 1456 spin_unlock_irqrestore(&lag_lock, flags); 1457 1458 return ndev; 1459 } 1460 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); 1461 1462 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 1463 struct net_device *slave) 1464 { 1465 struct mlx5_lag *ldev; 1466 unsigned long flags; 1467 u8 port = 0; 1468 int i; 1469 1470 spin_lock_irqsave(&lag_lock, flags); 1471 ldev = mlx5_lag_dev(dev); 1472 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1473 goto unlock; 1474 1475 for (i = 0; i < ldev->ports; i++) { 1476 if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1477 port = i; 1478 break; 1479 } 1480 } 1481 1482 port = ldev->v2p_map[port * ldev->buckets]; 1483 1484 unlock: 1485 spin_unlock_irqrestore(&lag_lock, flags); 1486 return port; 1487 } 1488 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1489 1490 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1491 { 1492 struct mlx5_lag *ldev; 1493 1494 ldev = mlx5_lag_dev(dev); 1495 if (!ldev) 1496 return 0; 1497 1498 return ldev->ports; 1499 } 1500 EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1501 1502 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) 1503 { 1504 struct mlx5_core_dev *peer_dev = NULL; 1505 struct mlx5_lag *ldev; 1506 unsigned long flags; 1507 1508 spin_lock_irqsave(&lag_lock, flags); 1509 ldev = mlx5_lag_dev(dev); 1510 if (!ldev) 1511 goto unlock; 1512 1513 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? 1514 ldev->pf[MLX5_LAG_P2].dev : 1515 ldev->pf[MLX5_LAG_P1].dev; 1516 1517 unlock: 1518 spin_unlock_irqrestore(&lag_lock, flags); 1519 return peer_dev; 1520 } 1521 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); 1522 1523 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 1524 u64 *values, 1525 int num_counters, 1526 size_t *offsets) 1527 { 1528 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1529 struct mlx5_core_dev **mdev; 1530 struct mlx5_lag *ldev; 1531 unsigned long flags; 1532 int num_ports; 1533 int ret, i, j; 1534 void *out; 1535 1536 out = kvzalloc(outlen, GFP_KERNEL); 1537 if (!out) 1538 return -ENOMEM; 1539 1540 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1541 if (!mdev) { 1542 ret = -ENOMEM; 1543 goto free_out; 1544 } 1545 1546 memset(values, 0, sizeof(*values) * num_counters); 1547 1548 spin_lock_irqsave(&lag_lock, flags); 1549 ldev = mlx5_lag_dev(dev); 1550 if (ldev && __mlx5_lag_is_active(ldev)) { 1551 num_ports = ldev->ports; 1552 for (i = 0; i < ldev->ports; i++) 1553 mdev[i] = ldev->pf[i].dev; 1554 } else { 1555 num_ports = 1; 1556 mdev[MLX5_LAG_P1] = dev; 1557 } 1558 spin_unlock_irqrestore(&lag_lock, flags); 1559 1560 for (i = 0; i < num_ports; ++i) { 1561 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 1562 1563 MLX5_SET(query_cong_statistics_in, in, opcode, 1564 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 1565 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1566 out); 1567 if (ret) 1568 goto free_mdev; 1569 1570 for (j = 0; j < num_counters; ++j) 1571 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1572 } 1573 1574 free_mdev: 1575 kvfree(mdev); 1576 free_out: 1577 kvfree(out); 1578 return ret; 1579 } 1580 EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 1581