1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/devcom.h"
39 #include "mlx5_core.h"
40 #include "eswitch.h"
41 #include "esw/acl/ofld.h"
42 #include "lag.h"
43 #include "mp.h"
44 
45 enum {
46 	MLX5_LAG_EGRESS_PORT_1 = 1,
47 	MLX5_LAG_EGRESS_PORT_2,
48 };
49 
50 /* General purpose, use for short periods of time.
51  * Beware of lock dependencies (preferably, no locks should be acquired
52  * under it).
53  */
54 static DEFINE_SPINLOCK(lag_lock);
55 
56 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
57 			       u8 remap_port2, bool shared_fdb, u8 flags)
58 {
59 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
60 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
61 
62 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
63 
64 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
65 	if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) {
66 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
67 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
68 	} else {
69 		MLX5_SET(lagc, lag_ctx, port_select_mode,
70 			 MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT);
71 	}
72 
73 	return mlx5_cmd_exec_in(dev, create_lag, in);
74 }
75 
76 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
77 			       u8 remap_port2)
78 {
79 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
80 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
81 
82 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
83 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
84 
85 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
86 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
87 
88 	return mlx5_cmd_exec_in(dev, modify_lag, in);
89 }
90 
91 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
92 {
93 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
94 
95 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
96 
97 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
98 }
99 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
100 
101 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
102 {
103 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
104 
105 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
106 
107 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
108 }
109 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
110 
111 static int mlx5_lag_netdev_event(struct notifier_block *this,
112 				 unsigned long event, void *ptr);
113 static void mlx5_do_bond_work(struct work_struct *work);
114 
115 static void mlx5_ldev_free(struct kref *ref)
116 {
117 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
118 
119 	if (ldev->nb.notifier_call)
120 		unregister_netdevice_notifier_net(&init_net, &ldev->nb);
121 	mlx5_lag_mp_cleanup(ldev);
122 	cancel_delayed_work_sync(&ldev->bond_work);
123 	destroy_workqueue(ldev->wq);
124 	kfree(ldev);
125 }
126 
127 static void mlx5_ldev_put(struct mlx5_lag *ldev)
128 {
129 	kref_put(&ldev->ref, mlx5_ldev_free);
130 }
131 
132 static void mlx5_ldev_get(struct mlx5_lag *ldev)
133 {
134 	kref_get(&ldev->ref);
135 }
136 
137 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
138 {
139 	struct mlx5_lag *ldev;
140 	int err;
141 
142 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
143 	if (!ldev)
144 		return NULL;
145 
146 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
147 	if (!ldev->wq) {
148 		kfree(ldev);
149 		return NULL;
150 	}
151 
152 	kref_init(&ldev->ref);
153 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
154 
155 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
156 	if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
157 		ldev->nb.notifier_call = NULL;
158 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
159 	}
160 
161 	err = mlx5_lag_mp_init(ldev);
162 	if (err)
163 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
164 			      err);
165 
166 	return ldev;
167 }
168 
169 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
170 				struct net_device *ndev)
171 {
172 	int i;
173 
174 	for (i = 0; i < MLX5_MAX_PORTS; i++)
175 		if (ldev->pf[i].netdev == ndev)
176 			return i;
177 
178 	return -ENOENT;
179 }
180 
181 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
182 {
183 	return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
184 }
185 
186 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
187 {
188 	return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
189 }
190 
191 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
192 					   u8 *port1, u8 *port2)
193 {
194 	bool p1en;
195 	bool p2en;
196 
197 	p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
198 	       tracker->netdev_state[MLX5_LAG_P1].link_up;
199 
200 	p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
201 	       tracker->netdev_state[MLX5_LAG_P2].link_up;
202 
203 	*port1 = MLX5_LAG_EGRESS_PORT_1;
204 	*port2 = MLX5_LAG_EGRESS_PORT_2;
205 	if ((!p1en && !p2en) || (p1en && p2en))
206 		return;
207 
208 	if (p1en)
209 		*port2 = MLX5_LAG_EGRESS_PORT_1;
210 	else
211 		*port1 = MLX5_LAG_EGRESS_PORT_2;
212 }
213 
214 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
215 {
216 	return ldev->pf[MLX5_LAG_P1].has_drop || ldev->pf[MLX5_LAG_P2].has_drop;
217 }
218 
219 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
220 {
221 	int i;
222 
223 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
224 		if (!ldev->pf[i].has_drop)
225 			continue;
226 
227 		mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
228 							     MLX5_VPORT_UPLINK);
229 		ldev->pf[i].has_drop = false;
230 	}
231 }
232 
233 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
234 				     struct lag_tracker *tracker)
235 {
236 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
237 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
238 	struct mlx5_core_dev *inactive;
239 	u8 v2p_port1, v2p_port2;
240 	int inactive_idx;
241 	int err;
242 
243 	/* First delete the current drop rule so there won't be any dropped
244 	 * packets
245 	 */
246 	mlx5_lag_drop_rule_cleanup(ldev);
247 
248 	if (!ldev->tracker.has_inactive)
249 		return;
250 
251 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, &v2p_port2);
252 
253 	if (v2p_port1 == MLX5_LAG_EGRESS_PORT_1) {
254 		inactive = dev1;
255 		inactive_idx = MLX5_LAG_P2;
256 	} else {
257 		inactive = dev0;
258 		inactive_idx = MLX5_LAG_P1;
259 	}
260 
261 	err = mlx5_esw_acl_ingress_vport_drop_rule_create(inactive->priv.eswitch,
262 							  MLX5_VPORT_UPLINK);
263 	if (!err)
264 		ldev->pf[inactive_idx].has_drop = true;
265 	else
266 		mlx5_core_err(inactive,
267 			      "Failed to create lag drop rule, error: %d", err);
268 }
269 
270 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2)
271 {
272 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
273 
274 	if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED)
275 		return mlx5_lag_port_sel_modify(ldev, v2p_port1, v2p_port2);
276 	return mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
277 }
278 
279 void mlx5_modify_lag(struct mlx5_lag *ldev,
280 		     struct lag_tracker *tracker)
281 {
282 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
283 	u8 v2p_port1, v2p_port2;
284 	int err;
285 
286 	mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
287 				       &v2p_port2);
288 
289 	if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
290 	    v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
291 		err = _mlx5_modify_lag(ldev, v2p_port1, v2p_port2);
292 		if (err) {
293 			mlx5_core_err(dev0,
294 				      "Failed to modify LAG (%d)\n",
295 				      err);
296 			return;
297 		}
298 		ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
299 		ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
300 		mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
301 			       ldev->v2p_map[MLX5_LAG_P1],
302 			       ldev->v2p_map[MLX5_LAG_P2]);
303 	}
304 
305 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
306 	    !(ldev->flags & MLX5_LAG_FLAG_ROCE))
307 		mlx5_lag_drop_rule_setup(ldev, tracker);
308 }
309 
310 static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
311 				       struct lag_tracker *tracker, u8 *flags)
312 {
313 	bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE);
314 	struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1];
315 
316 	if (roce_lag ||
317 	    !MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) ||
318 	    tracker->tx_type != NETDEV_LAG_TX_TYPE_HASH)
319 		return;
320 	*flags |= MLX5_LAG_FLAG_HASH_BASED;
321 }
322 
323 static char *get_str_port_sel_mode(u8 flags)
324 {
325 	if (flags &  MLX5_LAG_FLAG_HASH_BASED)
326 		return "hash";
327 	return "queue_affinity";
328 }
329 
330 static int mlx5_create_lag(struct mlx5_lag *ldev,
331 			   struct lag_tracker *tracker,
332 			   bool shared_fdb, u8 flags)
333 {
334 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
335 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
336 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
337 	int err;
338 
339 	mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d mode:%s",
340 		       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
341 		       shared_fdb, get_str_port_sel_mode(flags));
342 
343 	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
344 				  ldev->v2p_map[MLX5_LAG_P2], shared_fdb, flags);
345 	if (err) {
346 		mlx5_core_err(dev0,
347 			      "Failed to create LAG (%d)\n",
348 			      err);
349 		return err;
350 	}
351 
352 	if (shared_fdb) {
353 		err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
354 							      dev1->priv.eswitch);
355 		if (err)
356 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
357 		else
358 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
359 	}
360 
361 	if (err) {
362 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
363 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
364 			mlx5_core_err(dev0,
365 				      "Failed to deactivate RoCE LAG; driver restart required\n");
366 	}
367 
368 	return err;
369 }
370 
371 int mlx5_activate_lag(struct mlx5_lag *ldev,
372 		      struct lag_tracker *tracker,
373 		      u8 flags,
374 		      bool shared_fdb)
375 {
376 	bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
377 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
378 	int err;
379 
380 	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
381 				       &ldev->v2p_map[MLX5_LAG_P2]);
382 	mlx5_lag_set_port_sel_mode(ldev, tracker, &flags);
383 	if (flags & MLX5_LAG_FLAG_HASH_BASED) {
384 		err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
385 					       ldev->v2p_map[MLX5_LAG_P1],
386 					       ldev->v2p_map[MLX5_LAG_P2]);
387 		if (err) {
388 			mlx5_core_err(dev0,
389 				      "Failed to create LAG port selection(%d)\n",
390 				      err);
391 			return err;
392 		}
393 	}
394 
395 	err = mlx5_create_lag(ldev, tracker, shared_fdb, flags);
396 	if (err) {
397 		if (flags & MLX5_LAG_FLAG_HASH_BASED)
398 			mlx5_lag_port_sel_destroy(ldev);
399 		if (roce_lag)
400 			mlx5_core_err(dev0,
401 				      "Failed to activate RoCE LAG\n");
402 		else
403 			mlx5_core_err(dev0,
404 				      "Failed to activate VF LAG\n"
405 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
406 		return err;
407 	}
408 
409 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
410 	    !roce_lag)
411 		mlx5_lag_drop_rule_setup(ldev, tracker);
412 
413 	ldev->flags |= flags;
414 	ldev->shared_fdb = shared_fdb;
415 	return 0;
416 }
417 
418 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
419 {
420 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
421 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
422 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
423 	bool roce_lag = __mlx5_lag_is_roce(ldev);
424 	u8 flags = ldev->flags;
425 	int err;
426 
427 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
428 	mlx5_lag_mp_reset(ldev);
429 
430 	if (ldev->shared_fdb) {
431 		mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
432 							 dev1->priv.eswitch);
433 		ldev->shared_fdb = false;
434 	}
435 
436 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
437 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
438 	if (err) {
439 		if (roce_lag) {
440 			mlx5_core_err(dev0,
441 				      "Failed to deactivate RoCE LAG; driver restart required\n");
442 		} else {
443 			mlx5_core_err(dev0,
444 				      "Failed to deactivate VF LAG; driver restart required\n"
445 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
446 		}
447 		return err;
448 	}
449 
450 	if (flags & MLX5_LAG_FLAG_HASH_BASED)
451 		mlx5_lag_port_sel_destroy(ldev);
452 	if (mlx5_lag_has_drop_rule(ldev))
453 		mlx5_lag_drop_rule_cleanup(ldev);
454 
455 	return 0;
456 }
457 
458 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
459 {
460 	if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
461 		return false;
462 
463 #ifdef CONFIG_MLX5_ESWITCH
464 	return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
465 				   ldev->pf[MLX5_LAG_P2].dev);
466 #else
467 	return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
468 		!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
469 #endif
470 }
471 
472 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
473 {
474 	int i;
475 
476 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
477 		if (!ldev->pf[i].dev)
478 			continue;
479 
480 		if (ldev->pf[i].dev->priv.flags &
481 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
482 			continue;
483 
484 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
485 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
486 	}
487 }
488 
489 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
490 {
491 	int i;
492 
493 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
494 		if (!ldev->pf[i].dev)
495 			continue;
496 
497 		if (ldev->pf[i].dev->priv.flags &
498 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
499 			continue;
500 
501 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
502 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
503 	}
504 }
505 
506 static void mlx5_disable_lag(struct mlx5_lag *ldev)
507 {
508 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
509 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
510 	bool shared_fdb = ldev->shared_fdb;
511 	bool roce_lag;
512 	int err;
513 
514 	roce_lag = __mlx5_lag_is_roce(ldev);
515 
516 	if (shared_fdb) {
517 		mlx5_lag_remove_devices(ldev);
518 	} else if (roce_lag) {
519 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
520 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
521 			mlx5_rescan_drivers_locked(dev0);
522 		}
523 		mlx5_nic_vport_disable_roce(dev1);
524 	}
525 
526 	err = mlx5_deactivate_lag(ldev);
527 	if (err)
528 		return;
529 
530 	if (shared_fdb || roce_lag)
531 		mlx5_lag_add_devices(ldev);
532 
533 	if (shared_fdb) {
534 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
535 			mlx5_eswitch_reload_reps(dev0->priv.eswitch);
536 		if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
537 			mlx5_eswitch_reload_reps(dev1->priv.eswitch);
538 	}
539 }
540 
541 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
542 {
543 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
544 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
545 
546 	if (is_mdev_switchdev_mode(dev0) &&
547 	    is_mdev_switchdev_mode(dev1) &&
548 	    mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
549 	    mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
550 	    mlx5_devcom_is_paired(dev0->priv.devcom,
551 				  MLX5_DEVCOM_ESW_OFFLOADS) &&
552 	    MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
553 	    MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
554 	    MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
555 		return true;
556 
557 	return false;
558 }
559 
560 static void mlx5_do_bond(struct mlx5_lag *ldev)
561 {
562 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
563 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
564 	struct lag_tracker tracker;
565 	bool do_bond, roce_lag;
566 	int err;
567 
568 	if (!mlx5_lag_is_ready(ldev)) {
569 		do_bond = false;
570 	} else {
571 		/* VF LAG is in multipath mode, ignore bond change requests */
572 		if (mlx5_lag_is_multipath(dev0))
573 			return;
574 
575 		tracker = ldev->tracker;
576 
577 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
578 	}
579 
580 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
581 		bool shared_fdb = mlx5_shared_fdb_supported(ldev);
582 
583 		roce_lag = !mlx5_sriov_is_enabled(dev0) &&
584 			   !mlx5_sriov_is_enabled(dev1);
585 
586 #ifdef CONFIG_MLX5_ESWITCH
587 		roce_lag = roce_lag &&
588 			   dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
589 			   dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
590 #endif
591 
592 		if (shared_fdb || roce_lag)
593 			mlx5_lag_remove_devices(ldev);
594 
595 		err = mlx5_activate_lag(ldev, &tracker,
596 					roce_lag ? MLX5_LAG_FLAG_ROCE :
597 						   MLX5_LAG_FLAG_SRIOV,
598 					shared_fdb);
599 		if (err) {
600 			if (shared_fdb || roce_lag)
601 				mlx5_lag_add_devices(ldev);
602 
603 			return;
604 		} else if (roce_lag) {
605 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
606 			mlx5_rescan_drivers_locked(dev0);
607 			mlx5_nic_vport_enable_roce(dev1);
608 		} else if (shared_fdb) {
609 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
610 			mlx5_rescan_drivers_locked(dev0);
611 
612 			err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
613 			if (!err)
614 				err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
615 
616 			if (err) {
617 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
618 				mlx5_rescan_drivers_locked(dev0);
619 				mlx5_deactivate_lag(ldev);
620 				mlx5_lag_add_devices(ldev);
621 				mlx5_eswitch_reload_reps(dev0->priv.eswitch);
622 				mlx5_eswitch_reload_reps(dev1->priv.eswitch);
623 				mlx5_core_err(dev0, "Failed to enable lag\n");
624 				return;
625 			}
626 		}
627 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
628 		mlx5_modify_lag(ldev, &tracker);
629 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
630 		mlx5_disable_lag(ldev);
631 	}
632 }
633 
634 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
635 {
636 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
637 }
638 
639 static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
640 				    struct mlx5_core_dev *dev1)
641 {
642 	if (dev0)
643 		mlx5_esw_lock(dev0->priv.eswitch);
644 	if (dev1)
645 		mlx5_esw_lock(dev1->priv.eswitch);
646 }
647 
648 static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
649 				      struct mlx5_core_dev *dev1)
650 {
651 	if (dev1)
652 		mlx5_esw_unlock(dev1->priv.eswitch);
653 	if (dev0)
654 		mlx5_esw_unlock(dev0->priv.eswitch);
655 }
656 
657 static void mlx5_do_bond_work(struct work_struct *work)
658 {
659 	struct delayed_work *delayed_work = to_delayed_work(work);
660 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
661 					     bond_work);
662 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
663 	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
664 	int status;
665 
666 	status = mlx5_dev_list_trylock();
667 	if (!status) {
668 		mlx5_queue_bond_work(ldev, HZ);
669 		return;
670 	}
671 
672 	if (ldev->mode_changes_in_progress) {
673 		mlx5_dev_list_unlock();
674 		mlx5_queue_bond_work(ldev, HZ);
675 		return;
676 	}
677 
678 	mlx5_lag_lock_eswitches(dev0, dev1);
679 	mlx5_do_bond(ldev);
680 	mlx5_lag_unlock_eswitches(dev0, dev1);
681 	mlx5_dev_list_unlock();
682 }
683 
684 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
685 					 struct lag_tracker *tracker,
686 					 struct net_device *ndev,
687 					 struct netdev_notifier_changeupper_info *info)
688 {
689 	struct net_device *upper = info->upper_dev, *ndev_tmp;
690 	struct netdev_lag_upper_info *lag_upper_info = NULL;
691 	bool is_bonded, is_in_lag, mode_supported;
692 	bool has_inactive = 0;
693 	struct slave *slave;
694 	int bond_status = 0;
695 	int num_slaves = 0;
696 	int changed = 0;
697 	int idx;
698 
699 	if (!netif_is_lag_master(upper))
700 		return 0;
701 
702 	if (info->linking)
703 		lag_upper_info = info->upper_info;
704 
705 	/* The event may still be of interest if the slave does not belong to
706 	 * us, but is enslaved to a master which has one or more of our netdevs
707 	 * as slaves (e.g., if a new slave is added to a master that bonds two
708 	 * of our netdevs, we should unbond).
709 	 */
710 	rcu_read_lock();
711 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
712 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
713 		if (idx >= 0) {
714 			slave = bond_slave_get_rcu(ndev_tmp);
715 			if (slave)
716 				has_inactive |= bond_is_slave_inactive(slave);
717 			bond_status |= (1 << idx);
718 		}
719 
720 		num_slaves++;
721 	}
722 	rcu_read_unlock();
723 
724 	/* None of this lagdev's netdevs are slaves of this master. */
725 	if (!(bond_status & 0x3))
726 		return 0;
727 
728 	if (lag_upper_info) {
729 		tracker->tx_type = lag_upper_info->tx_type;
730 		tracker->hash_type = lag_upper_info->hash_type;
731 	}
732 
733 	tracker->has_inactive = has_inactive;
734 	/* Determine bonding status:
735 	 * A device is considered bonded if both its physical ports are slaves
736 	 * of the same lag master, and only them.
737 	 */
738 	is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
739 
740 	/* Lag mode must be activebackup or hash. */
741 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
742 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
743 
744 	is_bonded = is_in_lag && mode_supported;
745 	if (tracker->is_bonded != is_bonded) {
746 		tracker->is_bonded = is_bonded;
747 		changed = 1;
748 	}
749 
750 	if (!is_in_lag)
751 		return changed;
752 
753 	if (!mlx5_lag_is_ready(ldev))
754 		NL_SET_ERR_MSG_MOD(info->info.extack,
755 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
756 	else if (!mode_supported)
757 		NL_SET_ERR_MSG_MOD(info->info.extack,
758 				   "Can't activate LAG offload, TX type isn't supported");
759 
760 	return changed;
761 }
762 
763 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
764 					      struct lag_tracker *tracker,
765 					      struct net_device *ndev,
766 					      struct netdev_notifier_changelowerstate_info *info)
767 {
768 	struct netdev_lag_lower_state_info *lag_lower_info;
769 	int idx;
770 
771 	if (!netif_is_lag_port(ndev))
772 		return 0;
773 
774 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
775 	if (idx < 0)
776 		return 0;
777 
778 	/* This information is used to determine virtual to physical
779 	 * port mapping.
780 	 */
781 	lag_lower_info = info->lower_state_info;
782 	if (!lag_lower_info)
783 		return 0;
784 
785 	tracker->netdev_state[idx] = *lag_lower_info;
786 
787 	return 1;
788 }
789 
790 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
791 					    struct lag_tracker *tracker,
792 					    struct net_device *ndev)
793 {
794 	struct net_device *ndev_tmp;
795 	struct slave *slave;
796 	bool has_inactive = 0;
797 	int idx;
798 
799 	if (!netif_is_lag_master(ndev))
800 		return 0;
801 
802 	rcu_read_lock();
803 	for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
804 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
805 		if (idx < 0)
806 			continue;
807 
808 		slave = bond_slave_get_rcu(ndev_tmp);
809 		if (slave)
810 			has_inactive |= bond_is_slave_inactive(slave);
811 	}
812 	rcu_read_unlock();
813 
814 	if (tracker->has_inactive == has_inactive)
815 		return 0;
816 
817 	tracker->has_inactive = has_inactive;
818 
819 	return 1;
820 }
821 
822 static int mlx5_lag_netdev_event(struct notifier_block *this,
823 				 unsigned long event, void *ptr)
824 {
825 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
826 	struct lag_tracker tracker;
827 	struct mlx5_lag *ldev;
828 	int changed = 0;
829 
830 	if (event != NETDEV_CHANGEUPPER &&
831 	    event != NETDEV_CHANGELOWERSTATE &&
832 	    event != NETDEV_CHANGEINFODATA)
833 		return NOTIFY_DONE;
834 
835 	ldev    = container_of(this, struct mlx5_lag, nb);
836 
837 	tracker = ldev->tracker;
838 
839 	switch (event) {
840 	case NETDEV_CHANGEUPPER:
841 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
842 							ptr);
843 		break;
844 	case NETDEV_CHANGELOWERSTATE:
845 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
846 							     ndev, ptr);
847 		break;
848 	case NETDEV_CHANGEINFODATA:
849 		changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
850 		break;
851 	}
852 
853 	ldev->tracker = tracker;
854 
855 	if (changed)
856 		mlx5_queue_bond_work(ldev, 0);
857 
858 	return NOTIFY_DONE;
859 }
860 
861 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
862 				 struct mlx5_core_dev *dev,
863 				 struct net_device *netdev)
864 {
865 	unsigned int fn = mlx5_get_dev_index(dev);
866 
867 	if (fn >= MLX5_MAX_PORTS)
868 		return;
869 
870 	spin_lock(&lag_lock);
871 	ldev->pf[fn].netdev = netdev;
872 	ldev->tracker.netdev_state[fn].link_up = 0;
873 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
874 	spin_unlock(&lag_lock);
875 }
876 
877 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
878 				    struct net_device *netdev)
879 {
880 	int i;
881 
882 	spin_lock(&lag_lock);
883 	for (i = 0; i < MLX5_MAX_PORTS; i++) {
884 		if (ldev->pf[i].netdev == netdev) {
885 			ldev->pf[i].netdev = NULL;
886 			break;
887 		}
888 	}
889 	spin_unlock(&lag_lock);
890 }
891 
892 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
893 			       struct mlx5_core_dev *dev)
894 {
895 	unsigned int fn = mlx5_get_dev_index(dev);
896 
897 	if (fn >= MLX5_MAX_PORTS)
898 		return;
899 
900 	ldev->pf[fn].dev = dev;
901 	dev->priv.lag = ldev;
902 }
903 
904 /* Must be called with intf_mutex held */
905 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
906 				  struct mlx5_core_dev *dev)
907 {
908 	int i;
909 
910 	for (i = 0; i < MLX5_MAX_PORTS; i++)
911 		if (ldev->pf[i].dev == dev)
912 			break;
913 
914 	if (i == MLX5_MAX_PORTS)
915 		return;
916 
917 	ldev->pf[i].dev = NULL;
918 	dev->priv.lag = NULL;
919 }
920 
921 /* Must be called with intf_mutex held */
922 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
923 {
924 	struct mlx5_lag *ldev = NULL;
925 	struct mlx5_core_dev *tmp_dev;
926 
927 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
928 	    !MLX5_CAP_GEN(dev, lag_master) ||
929 	    MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
930 		return 0;
931 
932 	tmp_dev = mlx5_get_next_phys_dev(dev);
933 	if (tmp_dev)
934 		ldev = tmp_dev->priv.lag;
935 
936 	if (!ldev) {
937 		ldev = mlx5_lag_dev_alloc(dev);
938 		if (!ldev) {
939 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
940 			return 0;
941 		}
942 	} else {
943 		if (ldev->mode_changes_in_progress)
944 			return -EAGAIN;
945 		mlx5_ldev_get(ldev);
946 	}
947 
948 	mlx5_ldev_add_mdev(ldev, dev);
949 
950 	return 0;
951 }
952 
953 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
954 {
955 	struct mlx5_lag *ldev;
956 
957 	ldev = mlx5_lag_dev(dev);
958 	if (!ldev)
959 		return;
960 
961 recheck:
962 	mlx5_dev_list_lock();
963 	if (ldev->mode_changes_in_progress) {
964 		mlx5_dev_list_unlock();
965 		msleep(100);
966 		goto recheck;
967 	}
968 	mlx5_ldev_remove_mdev(ldev, dev);
969 	mlx5_dev_list_unlock();
970 	mlx5_ldev_put(ldev);
971 }
972 
973 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
974 {
975 	int err;
976 
977 recheck:
978 	mlx5_dev_list_lock();
979 	err = __mlx5_lag_dev_add_mdev(dev);
980 	if (err) {
981 		mlx5_dev_list_unlock();
982 		msleep(100);
983 		goto recheck;
984 	}
985 	mlx5_dev_list_unlock();
986 }
987 
988 /* Must be called with intf_mutex held */
989 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
990 			    struct net_device *netdev)
991 {
992 	struct mlx5_lag *ldev;
993 
994 	ldev = mlx5_lag_dev(dev);
995 	if (!ldev)
996 		return;
997 
998 	mlx5_ldev_remove_netdev(ldev, netdev);
999 	ldev->flags &= ~MLX5_LAG_FLAG_READY;
1000 
1001 	if (__mlx5_lag_is_active(ldev))
1002 		mlx5_queue_bond_work(ldev, 0);
1003 }
1004 
1005 /* Must be called with intf_mutex held */
1006 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1007 			 struct net_device *netdev)
1008 {
1009 	struct mlx5_lag *ldev;
1010 	int i;
1011 
1012 	ldev = mlx5_lag_dev(dev);
1013 	if (!ldev)
1014 		return;
1015 
1016 	mlx5_ldev_add_netdev(ldev, dev, netdev);
1017 
1018 	for (i = 0; i < MLX5_MAX_PORTS; i++)
1019 		if (!ldev->pf[i].dev)
1020 			break;
1021 
1022 	if (i >= MLX5_MAX_PORTS)
1023 		ldev->flags |= MLX5_LAG_FLAG_READY;
1024 	mlx5_queue_bond_work(ldev, 0);
1025 }
1026 
1027 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1028 {
1029 	struct mlx5_lag *ldev;
1030 	bool res;
1031 
1032 	spin_lock(&lag_lock);
1033 	ldev = mlx5_lag_dev(dev);
1034 	res  = ldev && __mlx5_lag_is_roce(ldev);
1035 	spin_unlock(&lag_lock);
1036 
1037 	return res;
1038 }
1039 EXPORT_SYMBOL(mlx5_lag_is_roce);
1040 
1041 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1042 {
1043 	struct mlx5_lag *ldev;
1044 	bool res;
1045 
1046 	spin_lock(&lag_lock);
1047 	ldev = mlx5_lag_dev(dev);
1048 	res  = ldev && __mlx5_lag_is_active(ldev);
1049 	spin_unlock(&lag_lock);
1050 
1051 	return res;
1052 }
1053 EXPORT_SYMBOL(mlx5_lag_is_active);
1054 
1055 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1056 {
1057 	struct mlx5_lag *ldev;
1058 	bool res;
1059 
1060 	spin_lock(&lag_lock);
1061 	ldev = mlx5_lag_dev(dev);
1062 	res = ldev && __mlx5_lag_is_active(ldev) &&
1063 		dev == ldev->pf[MLX5_LAG_P1].dev;
1064 	spin_unlock(&lag_lock);
1065 
1066 	return res;
1067 }
1068 EXPORT_SYMBOL(mlx5_lag_is_master);
1069 
1070 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1071 {
1072 	struct mlx5_lag *ldev;
1073 	bool res;
1074 
1075 	spin_lock(&lag_lock);
1076 	ldev = mlx5_lag_dev(dev);
1077 	res  = ldev && __mlx5_lag_is_sriov(ldev);
1078 	spin_unlock(&lag_lock);
1079 
1080 	return res;
1081 }
1082 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1083 
1084 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1085 {
1086 	struct mlx5_lag *ldev;
1087 	bool res;
1088 
1089 	spin_lock(&lag_lock);
1090 	ldev = mlx5_lag_dev(dev);
1091 	res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
1092 	spin_unlock(&lag_lock);
1093 
1094 	return res;
1095 }
1096 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1097 
1098 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1099 {
1100 	struct mlx5_core_dev *dev0;
1101 	struct mlx5_core_dev *dev1;
1102 	struct mlx5_lag *ldev;
1103 
1104 	ldev = mlx5_lag_dev(dev);
1105 	if (!ldev)
1106 		return;
1107 
1108 	mlx5_dev_list_lock();
1109 
1110 	dev0 = ldev->pf[MLX5_LAG_P1].dev;
1111 	dev1 = ldev->pf[MLX5_LAG_P2].dev;
1112 
1113 	ldev->mode_changes_in_progress++;
1114 	if (__mlx5_lag_is_active(ldev)) {
1115 		mlx5_lag_lock_eswitches(dev0, dev1);
1116 		mlx5_disable_lag(ldev);
1117 		mlx5_lag_unlock_eswitches(dev0, dev1);
1118 	}
1119 	mlx5_dev_list_unlock();
1120 }
1121 
1122 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1123 {
1124 	struct mlx5_lag *ldev;
1125 
1126 	ldev = mlx5_lag_dev(dev);
1127 	if (!ldev)
1128 		return;
1129 
1130 	mlx5_dev_list_lock();
1131 	ldev->mode_changes_in_progress--;
1132 	mlx5_dev_list_unlock();
1133 	mlx5_queue_bond_work(ldev, 0);
1134 }
1135 
1136 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
1137 {
1138 	struct net_device *ndev = NULL;
1139 	struct mlx5_lag *ldev;
1140 
1141 	spin_lock(&lag_lock);
1142 	ldev = mlx5_lag_dev(dev);
1143 
1144 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1145 		goto unlock;
1146 
1147 	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1148 		ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
1149 		       ldev->pf[MLX5_LAG_P1].netdev :
1150 		       ldev->pf[MLX5_LAG_P2].netdev;
1151 	} else {
1152 		ndev = ldev->pf[MLX5_LAG_P1].netdev;
1153 	}
1154 	if (ndev)
1155 		dev_hold(ndev);
1156 
1157 unlock:
1158 	spin_unlock(&lag_lock);
1159 
1160 	return ndev;
1161 }
1162 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
1163 
1164 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1165 			   struct net_device *slave)
1166 {
1167 	struct mlx5_lag *ldev;
1168 	u8 port = 0;
1169 
1170 	spin_lock(&lag_lock);
1171 	ldev = mlx5_lag_dev(dev);
1172 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1173 		goto unlock;
1174 
1175 	if (ldev->pf[MLX5_LAG_P1].netdev == slave)
1176 		port = MLX5_LAG_P1;
1177 	else
1178 		port = MLX5_LAG_P2;
1179 
1180 	port = ldev->v2p_map[port];
1181 
1182 unlock:
1183 	spin_unlock(&lag_lock);
1184 	return port;
1185 }
1186 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1187 
1188 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1189 {
1190 	struct mlx5_core_dev *peer_dev = NULL;
1191 	struct mlx5_lag *ldev;
1192 
1193 	spin_lock(&lag_lock);
1194 	ldev = mlx5_lag_dev(dev);
1195 	if (!ldev)
1196 		goto unlock;
1197 
1198 	peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1199 			   ldev->pf[MLX5_LAG_P2].dev :
1200 			   ldev->pf[MLX5_LAG_P1].dev;
1201 
1202 unlock:
1203 	spin_unlock(&lag_lock);
1204 	return peer_dev;
1205 }
1206 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1207 
1208 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1209 				 u64 *values,
1210 				 int num_counters,
1211 				 size_t *offsets)
1212 {
1213 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1214 	struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1215 	struct mlx5_lag *ldev;
1216 	int num_ports;
1217 	int ret, i, j;
1218 	void *out;
1219 
1220 	out = kvzalloc(outlen, GFP_KERNEL);
1221 	if (!out)
1222 		return -ENOMEM;
1223 
1224 	memset(values, 0, sizeof(*values) * num_counters);
1225 
1226 	spin_lock(&lag_lock);
1227 	ldev = mlx5_lag_dev(dev);
1228 	if (ldev && __mlx5_lag_is_active(ldev)) {
1229 		num_ports = MLX5_MAX_PORTS;
1230 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1231 		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1232 	} else {
1233 		num_ports = 1;
1234 		mdev[MLX5_LAG_P1] = dev;
1235 	}
1236 	spin_unlock(&lag_lock);
1237 
1238 	for (i = 0; i < num_ports; ++i) {
1239 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1240 
1241 		MLX5_SET(query_cong_statistics_in, in, opcode,
1242 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1243 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1244 					  out);
1245 		if (ret)
1246 			goto free;
1247 
1248 		for (j = 0; j < num_counters; ++j)
1249 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1250 	}
1251 
1252 free:
1253 	kvfree(out);
1254 	return ret;
1255 }
1256 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1257