1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/netdevice.h> 34 #include <net/bonding.h> 35 #include <linux/mlx5/driver.h> 36 #include <linux/mlx5/eswitch.h> 37 #include <linux/mlx5/vport.h> 38 #include "lib/devcom.h" 39 #include "mlx5_core.h" 40 #include "eswitch.h" 41 #include "esw/acl/ofld.h" 42 #include "lag.h" 43 #include "mp.h" 44 #include "mpesw.h" 45 46 enum { 47 MLX5_LAG_EGRESS_PORT_1 = 1, 48 MLX5_LAG_EGRESS_PORT_2, 49 }; 50 51 /* General purpose, use for short periods of time. 52 * Beware of lock dependencies (preferably, no locks should be acquired 53 * under it). 54 */ 55 static DEFINE_SPINLOCK(lag_lock); 56 57 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 58 { 59 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 61 62 if (mode == MLX5_LAG_MODE_MPESW) 63 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 64 65 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 66 } 67 68 static u8 lag_active_port_bits(struct mlx5_lag *ldev) 69 { 70 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 71 u8 active_port = 0; 72 int num_enabled; 73 int idx; 74 75 mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, 76 &num_enabled); 77 for (idx = 0; idx < num_enabled; idx++) 78 active_port |= BIT_MASK(enabled_ports[idx]); 79 80 return active_port; 81 } 82 83 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, 84 unsigned long flags) 85 { 86 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 87 &flags); 88 int port_sel_mode = get_port_sel_mode(mode, flags); 89 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 96 switch (port_sel_mode) { 97 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 98 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 99 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 100 break; 101 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 102 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 103 break; 104 105 MLX5_SET(lagc, lag_ctx, active_port, 106 lag_active_port_bits(mlx5_lag_dev(dev))); 107 break; 108 default: 109 break; 110 } 111 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 112 113 return mlx5_cmd_exec_in(dev, create_lag, in); 114 } 115 116 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 117 u8 *ports) 118 { 119 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 120 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 121 122 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 123 MLX5_SET(modify_lag_in, in, field_select, 0x1); 124 125 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 126 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 127 128 return mlx5_cmd_exec_in(dev, modify_lag, in); 129 } 130 131 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 132 { 133 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 134 135 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 136 137 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 138 } 139 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 140 141 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 142 { 143 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 144 145 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 146 147 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 148 } 149 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 150 151 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 152 u8 *ports, int *num_disabled) 153 { 154 int i; 155 156 *num_disabled = 0; 157 for (i = 0; i < num_ports; i++) { 158 if (!tracker->netdev_state[i].tx_enabled || 159 !tracker->netdev_state[i].link_up) 160 ports[(*num_disabled)++] = i; 161 } 162 } 163 164 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 165 u8 *ports, int *num_enabled) 166 { 167 int i; 168 169 *num_enabled = 0; 170 for (i = 0; i < num_ports; i++) { 171 if (tracker->netdev_state[i].tx_enabled && 172 tracker->netdev_state[i].link_up) 173 ports[(*num_enabled)++] = i; 174 } 175 176 if (*num_enabled == 0) 177 mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 178 } 179 180 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 181 struct mlx5_lag *ldev, 182 struct lag_tracker *tracker, 183 unsigned long flags) 184 { 185 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 186 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 187 int written = 0; 188 int num_enabled; 189 int idx; 190 int err; 191 int i; 192 int j; 193 194 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 195 mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 196 &num_enabled); 197 for (i = 0; i < num_enabled; i++) { 198 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 199 if (err != 3) 200 return; 201 written += err; 202 } 203 buf[written - 2] = 0; 204 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 205 } else { 206 for (i = 0; i < ldev->ports; i++) { 207 for (j = 0; j < ldev->buckets; j++) { 208 idx = i * ldev->buckets + j; 209 err = scnprintf(buf + written, 10, 210 " port %d:%d", i + 1, ldev->v2p_map[idx]); 211 if (err != 9) 212 return; 213 written += err; 214 } 215 } 216 mlx5_core_info(dev, "lag map:%s\n", buf); 217 } 218 } 219 220 static int mlx5_lag_netdev_event(struct notifier_block *this, 221 unsigned long event, void *ptr); 222 static void mlx5_do_bond_work(struct work_struct *work); 223 224 static void mlx5_ldev_free(struct kref *ref) 225 { 226 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 227 228 if (ldev->nb.notifier_call) 229 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 230 mlx5_lag_mp_cleanup(ldev); 231 cancel_delayed_work_sync(&ldev->bond_work); 232 destroy_workqueue(ldev->wq); 233 mutex_destroy(&ldev->lock); 234 kfree(ldev); 235 } 236 237 static void mlx5_ldev_put(struct mlx5_lag *ldev) 238 { 239 kref_put(&ldev->ref, mlx5_ldev_free); 240 } 241 242 static void mlx5_ldev_get(struct mlx5_lag *ldev) 243 { 244 kref_get(&ldev->ref); 245 } 246 247 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 248 { 249 struct mlx5_lag *ldev; 250 int err; 251 252 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); 253 if (!ldev) 254 return NULL; 255 256 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 257 if (!ldev->wq) { 258 kfree(ldev); 259 return NULL; 260 } 261 262 kref_init(&ldev->ref); 263 mutex_init(&ldev->lock); 264 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 265 266 ldev->nb.notifier_call = mlx5_lag_netdev_event; 267 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { 268 ldev->nb.notifier_call = NULL; 269 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 270 } 271 ldev->mode = MLX5_LAG_MODE_NONE; 272 273 err = mlx5_lag_mp_init(ldev); 274 if (err) 275 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 276 err); 277 278 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 279 ldev->buckets = 1; 280 281 return ldev; 282 } 283 284 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 285 struct net_device *ndev) 286 { 287 int i; 288 289 for (i = 0; i < ldev->ports; i++) 290 if (ldev->pf[i].netdev == ndev) 291 return i; 292 293 return -ENOENT; 294 } 295 296 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 297 { 298 return ldev->mode == MLX5_LAG_MODE_ROCE; 299 } 300 301 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 302 { 303 return ldev->mode == MLX5_LAG_MODE_SRIOV; 304 } 305 306 /* Create a mapping between steering slots and active ports. 307 * As we have ldev->buckets slots per port first assume the native 308 * mapping should be used. 309 * If there are ports that are disabled fill the relevant slots 310 * with mapping that points to active ports. 311 */ 312 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 313 u8 num_ports, 314 u8 buckets, 315 u8 *ports) 316 { 317 int disabled[MLX5_MAX_PORTS] = {}; 318 int enabled[MLX5_MAX_PORTS] = {}; 319 int disabled_ports_num = 0; 320 int enabled_ports_num = 0; 321 int idx; 322 u32 rand; 323 int i; 324 int j; 325 326 for (i = 0; i < num_ports; i++) { 327 if (tracker->netdev_state[i].tx_enabled && 328 tracker->netdev_state[i].link_up) 329 enabled[enabled_ports_num++] = i; 330 else 331 disabled[disabled_ports_num++] = i; 332 } 333 334 /* Use native mapping by default where each port's buckets 335 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 336 */ 337 for (i = 0; i < num_ports; i++) 338 for (j = 0; j < buckets; j++) { 339 idx = i * buckets + j; 340 ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 341 } 342 343 /* If all ports are disabled/enabled keep native mapping */ 344 if (enabled_ports_num == num_ports || 345 disabled_ports_num == num_ports) 346 return; 347 348 /* Go over the disabled ports and for each assign a random active port */ 349 for (i = 0; i < disabled_ports_num; i++) { 350 for (j = 0; j < buckets; j++) { 351 get_random_bytes(&rand, 4); 352 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 353 } 354 } 355 } 356 357 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 358 { 359 int i; 360 361 for (i = 0; i < ldev->ports; i++) 362 if (ldev->pf[i].has_drop) 363 return true; 364 return false; 365 } 366 367 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 368 { 369 int i; 370 371 for (i = 0; i < ldev->ports; i++) { 372 if (!ldev->pf[i].has_drop) 373 continue; 374 375 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, 376 MLX5_VPORT_UPLINK); 377 ldev->pf[i].has_drop = false; 378 } 379 } 380 381 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 382 struct lag_tracker *tracker) 383 { 384 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 385 struct mlx5_core_dev *dev; 386 int disabled_index; 387 int num_disabled; 388 int err; 389 int i; 390 391 /* First delete the current drop rule so there won't be any dropped 392 * packets 393 */ 394 mlx5_lag_drop_rule_cleanup(ldev); 395 396 if (!ldev->tracker.has_inactive) 397 return; 398 399 mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 400 401 for (i = 0; i < num_disabled; i++) { 402 disabled_index = disabled_ports[i]; 403 dev = ldev->pf[disabled_index].dev; 404 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 405 MLX5_VPORT_UPLINK); 406 if (!err) 407 ldev->pf[disabled_index].has_drop = true; 408 else 409 mlx5_core_err(dev, 410 "Failed to create lag drop rule, error: %d", err); 411 } 412 } 413 414 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 415 { 416 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 417 void *lag_ctx; 418 419 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 420 421 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 422 MLX5_SET(modify_lag_in, in, field_select, 0x2); 423 424 MLX5_SET(lagc, lag_ctx, active_port, ports); 425 426 return mlx5_cmd_exec_in(dev, modify_lag, in); 427 } 428 429 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 430 { 431 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 432 u8 active_ports; 433 int ret; 434 435 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 436 ret = mlx5_lag_port_sel_modify(ldev, ports); 437 if (ret || 438 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 439 return ret; 440 441 active_ports = lag_active_port_bits(ldev); 442 443 return mlx5_cmd_modify_active_port(dev0, active_ports); 444 } 445 return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 446 } 447 448 void mlx5_modify_lag(struct mlx5_lag *ldev, 449 struct lag_tracker *tracker) 450 { 451 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 452 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 453 int idx; 454 int err; 455 int i; 456 int j; 457 458 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 459 460 for (i = 0; i < ldev->ports; i++) { 461 for (j = 0; j < ldev->buckets; j++) { 462 idx = i * ldev->buckets + j; 463 if (ports[idx] == ldev->v2p_map[idx]) 464 continue; 465 err = _mlx5_modify_lag(ldev, ports); 466 if (err) { 467 mlx5_core_err(dev0, 468 "Failed to modify LAG (%d)\n", 469 err); 470 return; 471 } 472 memcpy(ldev->v2p_map, ports, sizeof(ports)); 473 474 mlx5_lag_print_mapping(dev0, ldev, tracker, 475 ldev->mode_flags); 476 break; 477 } 478 } 479 480 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 481 !(ldev->mode == MLX5_LAG_MODE_ROCE)) 482 mlx5_lag_drop_rule_setup(ldev, tracker); 483 } 484 485 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 486 unsigned long *flags) 487 { 488 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 489 490 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 491 if (ldev->ports > 2) 492 return -EINVAL; 493 return 0; 494 } 495 496 if (ldev->ports > 2) 497 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 498 499 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 500 501 return 0; 502 } 503 504 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 505 struct lag_tracker *tracker, 506 enum mlx5_lag_mode mode, 507 unsigned long *flags) 508 { 509 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 510 511 if (mode == MLX5_LAG_MODE_MPESW) 512 return; 513 514 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 515 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) { 516 if (ldev->ports > 2) 517 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 518 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 519 } 520 } 521 522 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 523 struct lag_tracker *tracker, bool shared_fdb, 524 unsigned long *flags) 525 { 526 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 527 528 *flags = 0; 529 if (shared_fdb) { 530 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 531 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 532 } 533 534 if (mode == MLX5_LAG_MODE_MPESW) 535 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 536 537 if (roce_lag) 538 return mlx5_lag_set_port_sel_mode_roce(ldev, flags); 539 540 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); 541 return 0; 542 } 543 544 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 545 { 546 int port_sel_mode = get_port_sel_mode(mode, flags); 547 548 switch (port_sel_mode) { 549 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 550 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 551 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 552 default: return "invalid"; 553 } 554 } 555 556 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) 557 { 558 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 559 struct mlx5_eswitch *master_esw = dev0->priv.eswitch; 560 int err; 561 int i; 562 563 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { 564 struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; 565 566 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, 567 slave_esw, ldev->ports); 568 if (err) 569 goto err; 570 } 571 return 0; 572 err: 573 for (; i > MLX5_LAG_P1; i--) 574 mlx5_eswitch_offloads_single_fdb_del_one(master_esw, 575 ldev->pf[i].dev->priv.eswitch); 576 return err; 577 } 578 579 static int mlx5_create_lag(struct mlx5_lag *ldev, 580 struct lag_tracker *tracker, 581 enum mlx5_lag_mode mode, 582 unsigned long flags) 583 { 584 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 585 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 586 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 587 int err; 588 589 if (tracker) 590 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 591 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 592 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 593 594 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); 595 if (err) { 596 mlx5_core_err(dev0, 597 "Failed to create LAG (%d)\n", 598 err); 599 return err; 600 } 601 602 if (shared_fdb) { 603 err = mlx5_lag_create_single_fdb(ldev); 604 if (err) 605 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 606 else 607 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 608 } 609 610 if (err) { 611 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 612 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 613 mlx5_core_err(dev0, 614 "Failed to deactivate RoCE LAG; driver restart required\n"); 615 } 616 617 return err; 618 } 619 620 int mlx5_activate_lag(struct mlx5_lag *ldev, 621 struct lag_tracker *tracker, 622 enum mlx5_lag_mode mode, 623 bool shared_fdb) 624 { 625 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 626 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 627 unsigned long flags = 0; 628 int err; 629 630 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 631 if (err) 632 return err; 633 634 if (mode != MLX5_LAG_MODE_MPESW) { 635 mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 636 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 637 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 638 ldev->v2p_map); 639 if (err) { 640 mlx5_core_err(dev0, 641 "Failed to create LAG port selection(%d)\n", 642 err); 643 return err; 644 } 645 } 646 } 647 648 err = mlx5_create_lag(ldev, tracker, mode, flags); 649 if (err) { 650 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 651 mlx5_lag_port_sel_destroy(ldev); 652 if (roce_lag) 653 mlx5_core_err(dev0, 654 "Failed to activate RoCE LAG\n"); 655 else 656 mlx5_core_err(dev0, 657 "Failed to activate VF LAG\n" 658 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 659 return err; 660 } 661 662 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 663 !roce_lag) 664 mlx5_lag_drop_rule_setup(ldev, tracker); 665 666 ldev->mode = mode; 667 ldev->mode_flags = flags; 668 return 0; 669 } 670 671 int mlx5_deactivate_lag(struct mlx5_lag *ldev) 672 { 673 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 674 struct mlx5_eswitch *master_esw = dev0->priv.eswitch; 675 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 676 bool roce_lag = __mlx5_lag_is_roce(ldev); 677 unsigned long flags = ldev->mode_flags; 678 int err; 679 int i; 680 681 ldev->mode = MLX5_LAG_MODE_NONE; 682 ldev->mode_flags = 0; 683 mlx5_lag_mp_reset(ldev); 684 685 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 686 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) 687 mlx5_eswitch_offloads_single_fdb_del_one(master_esw, 688 ldev->pf[i].dev->priv.eswitch); 689 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 690 } 691 692 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 693 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 694 if (err) { 695 if (roce_lag) { 696 mlx5_core_err(dev0, 697 "Failed to deactivate RoCE LAG; driver restart required\n"); 698 } else { 699 mlx5_core_err(dev0, 700 "Failed to deactivate VF LAG; driver restart required\n" 701 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 702 } 703 return err; 704 } 705 706 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 707 mlx5_lag_port_sel_destroy(ldev); 708 ldev->buckets = 1; 709 } 710 if (mlx5_lag_has_drop_rule(ldev)) 711 mlx5_lag_drop_rule_cleanup(ldev); 712 713 return 0; 714 } 715 716 #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 4 717 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 718 { 719 #ifdef CONFIG_MLX5_ESWITCH 720 struct mlx5_core_dev *dev; 721 u8 mode; 722 #endif 723 bool roce_support; 724 int i; 725 726 for (i = 0; i < ldev->ports; i++) 727 if (!ldev->pf[i].dev) 728 return false; 729 730 #ifdef CONFIG_MLX5_ESWITCH 731 for (i = 0; i < ldev->ports; i++) { 732 dev = ldev->pf[i].dev; 733 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) 734 return false; 735 } 736 737 dev = ldev->pf[MLX5_LAG_P1].dev; 738 mode = mlx5_eswitch_mode(dev); 739 for (i = 0; i < ldev->ports; i++) 740 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 741 return false; 742 743 if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports > MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 744 return false; 745 #else 746 for (i = 0; i < ldev->ports; i++) 747 if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 748 return false; 749 #endif 750 roce_support = mlx5_get_roce_state(ldev->pf[MLX5_LAG_P1].dev); 751 for (i = 1; i < ldev->ports; i++) 752 if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support) 753 return false; 754 755 return true; 756 } 757 758 void mlx5_lag_add_devices(struct mlx5_lag *ldev) 759 { 760 int i; 761 762 for (i = 0; i < ldev->ports; i++) { 763 if (!ldev->pf[i].dev) 764 continue; 765 766 if (ldev->pf[i].dev->priv.flags & 767 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 768 continue; 769 770 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 771 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 772 } 773 } 774 775 void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 776 { 777 int i; 778 779 for (i = 0; i < ldev->ports; i++) { 780 if (!ldev->pf[i].dev) 781 continue; 782 783 if (ldev->pf[i].dev->priv.flags & 784 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 785 continue; 786 787 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 788 mlx5_rescan_drivers_locked(ldev->pf[i].dev); 789 } 790 } 791 792 void mlx5_disable_lag(struct mlx5_lag *ldev) 793 { 794 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 795 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 796 bool roce_lag; 797 int err; 798 int i; 799 800 roce_lag = __mlx5_lag_is_roce(ldev); 801 802 if (shared_fdb) { 803 mlx5_lag_remove_devices(ldev); 804 } else if (roce_lag) { 805 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { 806 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 807 mlx5_rescan_drivers_locked(dev0); 808 } 809 for (i = 1; i < ldev->ports; i++) 810 mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 811 } 812 813 err = mlx5_deactivate_lag(ldev); 814 if (err) 815 return; 816 817 if (shared_fdb || roce_lag) 818 mlx5_lag_add_devices(ldev); 819 820 if (shared_fdb) 821 for (i = 0; i < ldev->ports; i++) 822 if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) 823 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); 824 } 825 826 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) 827 { 828 struct mlx5_core_dev *dev; 829 int i; 830 831 for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { 832 dev = ldev->pf[i].dev; 833 if (is_mdev_switchdev_mode(dev) && 834 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && 835 MLX5_CAP_GEN(dev, lag_native_fdb_selection) && 836 MLX5_CAP_ESW(dev, root_ft_on_other_esw) && 837 mlx5_eswitch_get_npeers(dev->priv.eswitch) == 838 MLX5_CAP_GEN(dev, num_lag_ports) - 1) 839 continue; 840 return false; 841 } 842 843 dev = ldev->pf[MLX5_LAG_P1].dev; 844 if (is_mdev_switchdev_mode(dev) && 845 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && 846 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && 847 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && 848 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) 849 return true; 850 851 return false; 852 } 853 854 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 855 { 856 bool roce_lag = true; 857 int i; 858 859 for (i = 0; i < ldev->ports; i++) 860 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 861 862 #ifdef CONFIG_MLX5_ESWITCH 863 for (i = 0; i < ldev->ports; i++) 864 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); 865 #endif 866 867 return roce_lag; 868 } 869 870 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 871 { 872 return do_bond && __mlx5_lag_is_active(ldev) && 873 ldev->mode != MLX5_LAG_MODE_MPESW; 874 } 875 876 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 877 { 878 return !do_bond && __mlx5_lag_is_active(ldev) && 879 ldev->mode != MLX5_LAG_MODE_MPESW; 880 } 881 882 static void mlx5_do_bond(struct mlx5_lag *ldev) 883 { 884 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 885 struct lag_tracker tracker = { }; 886 bool do_bond, roce_lag; 887 int err; 888 int i; 889 890 if (!mlx5_lag_is_ready(ldev)) { 891 do_bond = false; 892 } else { 893 /* VF LAG is in multipath mode, ignore bond change requests */ 894 if (mlx5_lag_is_multipath(dev0)) 895 return; 896 897 tracker = ldev->tracker; 898 899 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 900 } 901 902 if (do_bond && !__mlx5_lag_is_active(ldev)) { 903 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 904 905 roce_lag = mlx5_lag_is_roce_lag(ldev); 906 907 if (shared_fdb || roce_lag) 908 mlx5_lag_remove_devices(ldev); 909 910 err = mlx5_activate_lag(ldev, &tracker, 911 roce_lag ? MLX5_LAG_MODE_ROCE : 912 MLX5_LAG_MODE_SRIOV, 913 shared_fdb); 914 if (err) { 915 if (shared_fdb || roce_lag) 916 mlx5_lag_add_devices(ldev); 917 918 return; 919 } else if (roce_lag) { 920 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 921 mlx5_rescan_drivers_locked(dev0); 922 for (i = 1; i < ldev->ports; i++) { 923 if (mlx5_get_roce_state(ldev->pf[i].dev)) 924 mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 925 } 926 } else if (shared_fdb) { 927 int i; 928 929 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 930 mlx5_rescan_drivers_locked(dev0); 931 932 for (i = 0; i < ldev->ports; i++) { 933 err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); 934 if (err) 935 break; 936 } 937 938 if (err) { 939 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 940 mlx5_rescan_drivers_locked(dev0); 941 mlx5_deactivate_lag(ldev); 942 mlx5_lag_add_devices(ldev); 943 for (i = 0; i < ldev->ports; i++) 944 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); 945 mlx5_core_err(dev0, "Failed to enable lag\n"); 946 return; 947 } 948 } 949 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 950 mlx5_modify_lag(ldev, &tracker); 951 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 952 mlx5_disable_lag(ldev); 953 } 954 } 955 956 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 957 { 958 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 959 } 960 961 static void mlx5_do_bond_work(struct work_struct *work) 962 { 963 struct delayed_work *delayed_work = to_delayed_work(work); 964 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 965 bond_work); 966 int status; 967 968 status = mlx5_dev_list_trylock(); 969 if (!status) { 970 mlx5_queue_bond_work(ldev, HZ); 971 return; 972 } 973 974 mutex_lock(&ldev->lock); 975 if (ldev->mode_changes_in_progress) { 976 mutex_unlock(&ldev->lock); 977 mlx5_dev_list_unlock(); 978 mlx5_queue_bond_work(ldev, HZ); 979 return; 980 } 981 982 mlx5_do_bond(ldev); 983 mutex_unlock(&ldev->lock); 984 mlx5_dev_list_unlock(); 985 } 986 987 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 988 struct lag_tracker *tracker, 989 struct netdev_notifier_changeupper_info *info) 990 { 991 struct net_device *upper = info->upper_dev, *ndev_tmp; 992 struct netdev_lag_upper_info *lag_upper_info = NULL; 993 bool is_bonded, is_in_lag, mode_supported; 994 bool has_inactive = 0; 995 struct slave *slave; 996 u8 bond_status = 0; 997 int num_slaves = 0; 998 int changed = 0; 999 int idx; 1000 1001 if (!netif_is_lag_master(upper)) 1002 return 0; 1003 1004 if (info->linking) 1005 lag_upper_info = info->upper_info; 1006 1007 /* The event may still be of interest if the slave does not belong to 1008 * us, but is enslaved to a master which has one or more of our netdevs 1009 * as slaves (e.g., if a new slave is added to a master that bonds two 1010 * of our netdevs, we should unbond). 1011 */ 1012 rcu_read_lock(); 1013 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 1014 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1015 if (idx >= 0) { 1016 slave = bond_slave_get_rcu(ndev_tmp); 1017 if (slave) 1018 has_inactive |= bond_is_slave_inactive(slave); 1019 bond_status |= (1 << idx); 1020 } 1021 1022 num_slaves++; 1023 } 1024 rcu_read_unlock(); 1025 1026 /* None of this lagdev's netdevs are slaves of this master. */ 1027 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 1028 return 0; 1029 1030 if (lag_upper_info) { 1031 tracker->tx_type = lag_upper_info->tx_type; 1032 tracker->hash_type = lag_upper_info->hash_type; 1033 } 1034 1035 tracker->has_inactive = has_inactive; 1036 /* Determine bonding status: 1037 * A device is considered bonded if both its physical ports are slaves 1038 * of the same lag master, and only them. 1039 */ 1040 is_in_lag = num_slaves == ldev->ports && 1041 bond_status == GENMASK(ldev->ports - 1, 0); 1042 1043 /* Lag mode must be activebackup or hash. */ 1044 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 1045 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1046 1047 is_bonded = is_in_lag && mode_supported; 1048 if (tracker->is_bonded != is_bonded) { 1049 tracker->is_bonded = is_bonded; 1050 changed = 1; 1051 } 1052 1053 if (!is_in_lag) 1054 return changed; 1055 1056 if (!mlx5_lag_is_ready(ldev)) 1057 NL_SET_ERR_MSG_MOD(info->info.extack, 1058 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1059 else if (!mode_supported) 1060 NL_SET_ERR_MSG_MOD(info->info.extack, 1061 "Can't activate LAG offload, TX type isn't supported"); 1062 1063 return changed; 1064 } 1065 1066 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1067 struct lag_tracker *tracker, 1068 struct net_device *ndev, 1069 struct netdev_notifier_changelowerstate_info *info) 1070 { 1071 struct netdev_lag_lower_state_info *lag_lower_info; 1072 int idx; 1073 1074 if (!netif_is_lag_port(ndev)) 1075 return 0; 1076 1077 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1078 if (idx < 0) 1079 return 0; 1080 1081 /* This information is used to determine virtual to physical 1082 * port mapping. 1083 */ 1084 lag_lower_info = info->lower_state_info; 1085 if (!lag_lower_info) 1086 return 0; 1087 1088 tracker->netdev_state[idx] = *lag_lower_info; 1089 1090 return 1; 1091 } 1092 1093 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1094 struct lag_tracker *tracker, 1095 struct net_device *ndev) 1096 { 1097 struct net_device *ndev_tmp; 1098 struct slave *slave; 1099 bool has_inactive = 0; 1100 int idx; 1101 1102 if (!netif_is_lag_master(ndev)) 1103 return 0; 1104 1105 rcu_read_lock(); 1106 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 1107 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 1108 if (idx < 0) 1109 continue; 1110 1111 slave = bond_slave_get_rcu(ndev_tmp); 1112 if (slave) 1113 has_inactive |= bond_is_slave_inactive(slave); 1114 } 1115 rcu_read_unlock(); 1116 1117 if (tracker->has_inactive == has_inactive) 1118 return 0; 1119 1120 tracker->has_inactive = has_inactive; 1121 1122 return 1; 1123 } 1124 1125 /* this handler is always registered to netdev events */ 1126 static int mlx5_lag_netdev_event(struct notifier_block *this, 1127 unsigned long event, void *ptr) 1128 { 1129 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 1130 struct lag_tracker tracker; 1131 struct mlx5_lag *ldev; 1132 int changed = 0; 1133 1134 if (event != NETDEV_CHANGEUPPER && 1135 event != NETDEV_CHANGELOWERSTATE && 1136 event != NETDEV_CHANGEINFODATA) 1137 return NOTIFY_DONE; 1138 1139 ldev = container_of(this, struct mlx5_lag, nb); 1140 1141 tracker = ldev->tracker; 1142 1143 switch (event) { 1144 case NETDEV_CHANGEUPPER: 1145 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 1146 break; 1147 case NETDEV_CHANGELOWERSTATE: 1148 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 1149 ndev, ptr); 1150 break; 1151 case NETDEV_CHANGEINFODATA: 1152 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 1153 break; 1154 } 1155 1156 ldev->tracker = tracker; 1157 1158 if (changed) 1159 mlx5_queue_bond_work(ldev, 0); 1160 1161 return NOTIFY_DONE; 1162 } 1163 1164 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 1165 struct mlx5_core_dev *dev, 1166 struct net_device *netdev) 1167 { 1168 unsigned int fn = mlx5_get_dev_index(dev); 1169 unsigned long flags; 1170 1171 if (fn >= ldev->ports) 1172 return; 1173 1174 spin_lock_irqsave(&lag_lock, flags); 1175 ldev->pf[fn].netdev = netdev; 1176 ldev->tracker.netdev_state[fn].link_up = 0; 1177 ldev->tracker.netdev_state[fn].tx_enabled = 0; 1178 spin_unlock_irqrestore(&lag_lock, flags); 1179 } 1180 1181 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 1182 struct net_device *netdev) 1183 { 1184 unsigned long flags; 1185 int i; 1186 1187 spin_lock_irqsave(&lag_lock, flags); 1188 for (i = 0; i < ldev->ports; i++) { 1189 if (ldev->pf[i].netdev == netdev) { 1190 ldev->pf[i].netdev = NULL; 1191 break; 1192 } 1193 } 1194 spin_unlock_irqrestore(&lag_lock, flags); 1195 } 1196 1197 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 1198 struct mlx5_core_dev *dev) 1199 { 1200 unsigned int fn = mlx5_get_dev_index(dev); 1201 1202 if (fn >= ldev->ports) 1203 return; 1204 1205 ldev->pf[fn].dev = dev; 1206 dev->priv.lag = ldev; 1207 } 1208 1209 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1210 struct mlx5_core_dev *dev) 1211 { 1212 int i; 1213 1214 for (i = 0; i < ldev->ports; i++) 1215 if (ldev->pf[i].dev == dev) 1216 break; 1217 1218 if (i == ldev->ports) 1219 return; 1220 1221 ldev->pf[i].dev = NULL; 1222 dev->priv.lag = NULL; 1223 } 1224 1225 /* Must be called with intf_mutex held */ 1226 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 1227 { 1228 struct mlx5_lag *ldev = NULL; 1229 struct mlx5_core_dev *tmp_dev; 1230 1231 tmp_dev = mlx5_get_next_phys_dev_lag(dev); 1232 if (tmp_dev) 1233 ldev = mlx5_lag_dev(tmp_dev); 1234 1235 if (!ldev) { 1236 ldev = mlx5_lag_dev_alloc(dev); 1237 if (!ldev) { 1238 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1239 return 0; 1240 } 1241 mlx5_ldev_add_mdev(ldev, dev); 1242 return 0; 1243 } 1244 1245 mutex_lock(&ldev->lock); 1246 if (ldev->mode_changes_in_progress) { 1247 mutex_unlock(&ldev->lock); 1248 return -EAGAIN; 1249 } 1250 mlx5_ldev_get(ldev); 1251 mlx5_ldev_add_mdev(ldev, dev); 1252 mutex_unlock(&ldev->lock); 1253 1254 return 0; 1255 } 1256 1257 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 1258 { 1259 struct mlx5_lag *ldev; 1260 1261 ldev = mlx5_lag_dev(dev); 1262 if (!ldev) 1263 return; 1264 1265 /* mdev is being removed, might as well remove debugfs 1266 * as early as possible. 1267 */ 1268 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1269 recheck: 1270 mutex_lock(&ldev->lock); 1271 if (ldev->mode_changes_in_progress) { 1272 mutex_unlock(&ldev->lock); 1273 msleep(100); 1274 goto recheck; 1275 } 1276 mlx5_ldev_remove_mdev(ldev, dev); 1277 mutex_unlock(&ldev->lock); 1278 mlx5_ldev_put(ldev); 1279 } 1280 1281 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 1282 { 1283 int err; 1284 1285 if (!mlx5_lag_is_supported(dev)) 1286 return; 1287 1288 recheck: 1289 mlx5_dev_list_lock(); 1290 err = __mlx5_lag_dev_add_mdev(dev); 1291 mlx5_dev_list_unlock(); 1292 1293 if (err) { 1294 msleep(100); 1295 goto recheck; 1296 } 1297 mlx5_ldev_add_debugfs(dev); 1298 } 1299 1300 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1301 struct net_device *netdev) 1302 { 1303 struct mlx5_lag *ldev; 1304 bool lag_is_active; 1305 1306 ldev = mlx5_lag_dev(dev); 1307 if (!ldev) 1308 return; 1309 1310 mutex_lock(&ldev->lock); 1311 mlx5_ldev_remove_netdev(ldev, netdev); 1312 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1313 1314 lag_is_active = __mlx5_lag_is_active(ldev); 1315 mutex_unlock(&ldev->lock); 1316 1317 if (lag_is_active) 1318 mlx5_queue_bond_work(ldev, 0); 1319 } 1320 1321 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1322 struct net_device *netdev) 1323 { 1324 struct mlx5_lag *ldev; 1325 int i; 1326 1327 ldev = mlx5_lag_dev(dev); 1328 if (!ldev) 1329 return; 1330 1331 mutex_lock(&ldev->lock); 1332 mlx5_ldev_add_netdev(ldev, dev, netdev); 1333 1334 for (i = 0; i < ldev->ports; i++) 1335 if (!ldev->pf[i].netdev) 1336 break; 1337 1338 if (i >= ldev->ports) 1339 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 1340 mutex_unlock(&ldev->lock); 1341 mlx5_queue_bond_work(ldev, 0); 1342 } 1343 1344 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 1345 { 1346 struct mlx5_lag *ldev; 1347 unsigned long flags; 1348 bool res; 1349 1350 spin_lock_irqsave(&lag_lock, flags); 1351 ldev = mlx5_lag_dev(dev); 1352 res = ldev && __mlx5_lag_is_roce(ldev); 1353 spin_unlock_irqrestore(&lag_lock, flags); 1354 1355 return res; 1356 } 1357 EXPORT_SYMBOL(mlx5_lag_is_roce); 1358 1359 bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 1360 { 1361 struct mlx5_lag *ldev; 1362 unsigned long flags; 1363 bool res; 1364 1365 spin_lock_irqsave(&lag_lock, flags); 1366 ldev = mlx5_lag_dev(dev); 1367 res = ldev && __mlx5_lag_is_active(ldev); 1368 spin_unlock_irqrestore(&lag_lock, flags); 1369 1370 return res; 1371 } 1372 EXPORT_SYMBOL(mlx5_lag_is_active); 1373 1374 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 1375 { 1376 struct mlx5_lag *ldev; 1377 unsigned long flags; 1378 bool res = 0; 1379 1380 spin_lock_irqsave(&lag_lock, flags); 1381 ldev = mlx5_lag_dev(dev); 1382 if (ldev) 1383 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 1384 spin_unlock_irqrestore(&lag_lock, flags); 1385 1386 return res; 1387 } 1388 EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 1389 1390 bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 1391 { 1392 struct mlx5_lag *ldev; 1393 unsigned long flags; 1394 bool res; 1395 1396 spin_lock_irqsave(&lag_lock, flags); 1397 ldev = mlx5_lag_dev(dev); 1398 res = ldev && __mlx5_lag_is_active(ldev) && 1399 dev == ldev->pf[MLX5_LAG_P1].dev; 1400 spin_unlock_irqrestore(&lag_lock, flags); 1401 1402 return res; 1403 } 1404 EXPORT_SYMBOL(mlx5_lag_is_master); 1405 1406 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 1407 { 1408 struct mlx5_lag *ldev; 1409 unsigned long flags; 1410 bool res; 1411 1412 spin_lock_irqsave(&lag_lock, flags); 1413 ldev = mlx5_lag_dev(dev); 1414 res = ldev && __mlx5_lag_is_sriov(ldev); 1415 spin_unlock_irqrestore(&lag_lock, flags); 1416 1417 return res; 1418 } 1419 EXPORT_SYMBOL(mlx5_lag_is_sriov); 1420 1421 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 1422 { 1423 struct mlx5_lag *ldev; 1424 unsigned long flags; 1425 bool res; 1426 1427 spin_lock_irqsave(&lag_lock, flags); 1428 ldev = mlx5_lag_dev(dev); 1429 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1430 spin_unlock_irqrestore(&lag_lock, flags); 1431 1432 return res; 1433 } 1434 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 1435 1436 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1437 { 1438 struct mlx5_lag *ldev; 1439 1440 ldev = mlx5_lag_dev(dev); 1441 if (!ldev) 1442 return; 1443 1444 mlx5_dev_list_lock(); 1445 mutex_lock(&ldev->lock); 1446 1447 ldev->mode_changes_in_progress++; 1448 if (__mlx5_lag_is_active(ldev)) 1449 mlx5_disable_lag(ldev); 1450 1451 mutex_unlock(&ldev->lock); 1452 mlx5_dev_list_unlock(); 1453 } 1454 1455 void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 1456 { 1457 struct mlx5_lag *ldev; 1458 1459 ldev = mlx5_lag_dev(dev); 1460 if (!ldev) 1461 return; 1462 1463 mutex_lock(&ldev->lock); 1464 ldev->mode_changes_in_progress--; 1465 mutex_unlock(&ldev->lock); 1466 mlx5_queue_bond_work(ldev, 0); 1467 } 1468 1469 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) 1470 { 1471 struct net_device *ndev = NULL; 1472 struct mlx5_lag *ldev; 1473 unsigned long flags; 1474 int i; 1475 1476 spin_lock_irqsave(&lag_lock, flags); 1477 ldev = mlx5_lag_dev(dev); 1478 1479 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1480 goto unlock; 1481 1482 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1483 for (i = 0; i < ldev->ports; i++) 1484 if (ldev->tracker.netdev_state[i].tx_enabled) 1485 ndev = ldev->pf[i].netdev; 1486 if (!ndev) 1487 ndev = ldev->pf[ldev->ports - 1].netdev; 1488 } else { 1489 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1490 } 1491 if (ndev) 1492 dev_hold(ndev); 1493 1494 unlock: 1495 spin_unlock_irqrestore(&lag_lock, flags); 1496 1497 return ndev; 1498 } 1499 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); 1500 1501 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 1502 struct net_device *slave) 1503 { 1504 struct mlx5_lag *ldev; 1505 unsigned long flags; 1506 u8 port = 0; 1507 int i; 1508 1509 spin_lock_irqsave(&lag_lock, flags); 1510 ldev = mlx5_lag_dev(dev); 1511 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1512 goto unlock; 1513 1514 for (i = 0; i < ldev->ports; i++) { 1515 if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1516 port = i; 1517 break; 1518 } 1519 } 1520 1521 port = ldev->v2p_map[port * ldev->buckets]; 1522 1523 unlock: 1524 spin_unlock_irqrestore(&lag_lock, flags); 1525 return port; 1526 } 1527 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1528 1529 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1530 { 1531 struct mlx5_lag *ldev; 1532 1533 ldev = mlx5_lag_dev(dev); 1534 if (!ldev) 1535 return 0; 1536 1537 return ldev->ports; 1538 } 1539 EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1540 1541 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i) 1542 { 1543 struct mlx5_core_dev *peer_dev = NULL; 1544 struct mlx5_lag *ldev; 1545 unsigned long flags; 1546 int idx; 1547 1548 spin_lock_irqsave(&lag_lock, flags); 1549 ldev = mlx5_lag_dev(dev); 1550 if (!ldev) 1551 goto unlock; 1552 1553 if (*i == ldev->ports) 1554 goto unlock; 1555 for (idx = *i; idx < ldev->ports; idx++) 1556 if (ldev->pf[idx].dev != dev) 1557 break; 1558 1559 if (idx == ldev->ports) { 1560 *i = idx; 1561 goto unlock; 1562 } 1563 *i = idx + 1; 1564 1565 peer_dev = ldev->pf[idx].dev; 1566 1567 unlock: 1568 spin_unlock_irqrestore(&lag_lock, flags); 1569 return peer_dev; 1570 } 1571 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev); 1572 1573 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 1574 u64 *values, 1575 int num_counters, 1576 size_t *offsets) 1577 { 1578 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1579 struct mlx5_core_dev **mdev; 1580 struct mlx5_lag *ldev; 1581 unsigned long flags; 1582 int num_ports; 1583 int ret, i, j; 1584 void *out; 1585 1586 out = kvzalloc(outlen, GFP_KERNEL); 1587 if (!out) 1588 return -ENOMEM; 1589 1590 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1591 if (!mdev) { 1592 ret = -ENOMEM; 1593 goto free_out; 1594 } 1595 1596 memset(values, 0, sizeof(*values) * num_counters); 1597 1598 spin_lock_irqsave(&lag_lock, flags); 1599 ldev = mlx5_lag_dev(dev); 1600 if (ldev && __mlx5_lag_is_active(ldev)) { 1601 num_ports = ldev->ports; 1602 for (i = 0; i < ldev->ports; i++) 1603 mdev[i] = ldev->pf[i].dev; 1604 } else { 1605 num_ports = 1; 1606 mdev[MLX5_LAG_P1] = dev; 1607 } 1608 spin_unlock_irqrestore(&lag_lock, flags); 1609 1610 for (i = 0; i < num_ports; ++i) { 1611 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 1612 1613 MLX5_SET(query_cong_statistics_in, in, opcode, 1614 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 1615 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1616 out); 1617 if (ret) 1618 goto free_mdev; 1619 1620 for (j = 0; j < num_counters; ++j) 1621 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1622 } 1623 1624 free_mdev: 1625 kvfree(mdev); 1626 free_out: 1627 kvfree(out); 1628 return ret; 1629 } 1630 EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 1631