1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3 
4 #include "eswitch.h"
5 #include "esw/qos.h"
6 #include "en/port.h"
7 #define CREATE_TRACE_POINTS
8 #include "diag/qos_tracepoint.h"
9 
10 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
11 #define MLX5_MIN_BW_SHARE 1
12 
13 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
14 	min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
15 
16 struct mlx5_esw_rate_group {
17 	u32 tsar_ix;
18 	u32 max_rate;
19 	u32 min_rate;
20 	u32 bw_share;
21 	struct list_head list;
22 };
23 
24 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
25 			       u32 parent_ix, u32 tsar_ix,
26 			       u32 max_rate, u32 bw_share)
27 {
28 	u32 bitmask = 0;
29 
30 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
31 		return -EOPNOTSUPP;
32 
33 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
34 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
35 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
36 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
37 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
38 
39 	return mlx5_modify_scheduling_element_cmd(dev,
40 						  SCHEDULING_HIERARCHY_E_SWITCH,
41 						  sched_ctx,
42 						  tsar_ix,
43 						  bitmask);
44 }
45 
46 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
47 				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
48 {
49 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
50 	struct mlx5_core_dev *dev = esw->dev;
51 	int err;
52 
53 	err = esw_qos_tsar_config(dev, sched_ctx,
54 				  esw->qos.root_tsar_ix, group->tsar_ix,
55 				  max_rate, bw_share);
56 	if (err)
57 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
58 
59 	trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
60 
61 	return err;
62 }
63 
64 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
65 				struct mlx5_vport *vport,
66 				u32 max_rate, u32 bw_share,
67 				struct netlink_ext_ack *extack)
68 {
69 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
70 	struct mlx5_esw_rate_group *group = vport->qos.group;
71 	struct mlx5_core_dev *dev = esw->dev;
72 	u32 parent_tsar_ix;
73 	void *vport_elem;
74 	int err;
75 
76 	if (!vport->qos.enabled)
77 		return -EIO;
78 
79 	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
80 	MLX5_SET(scheduling_context, sched_ctx, element_type,
81 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
82 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
83 				  element_attributes);
84 	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
85 
86 	err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
87 				  max_rate, bw_share);
88 	if (err) {
89 		esw_warn(esw->dev,
90 			 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
91 			 vport->vport, err);
92 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
93 		return err;
94 	}
95 
96 	trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
97 
98 	return 0;
99 }
100 
101 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
102 					      struct mlx5_esw_rate_group *group,
103 					      bool group_level)
104 {
105 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
106 	struct mlx5_vport *evport;
107 	u32 max_guarantee = 0;
108 	unsigned long i;
109 
110 	if (group_level) {
111 		struct mlx5_esw_rate_group *group;
112 
113 		list_for_each_entry(group, &esw->qos.groups, list) {
114 			if (group->min_rate < max_guarantee)
115 				continue;
116 			max_guarantee = group->min_rate;
117 		}
118 	} else {
119 		mlx5_esw_for_each_vport(esw, i, evport) {
120 			if (!evport->enabled || !evport->qos.enabled ||
121 			    evport->qos.group != group || evport->qos.min_rate < max_guarantee)
122 				continue;
123 			max_guarantee = evport->qos.min_rate;
124 		}
125 	}
126 
127 	if (max_guarantee)
128 		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
129 
130 	/* If vports min rate divider is 0 but their group has bw_share configured, then
131 	 * need to set bw_share for vports to minimal value.
132 	 */
133 	if (!group_level && !max_guarantee && group->bw_share)
134 		return 1;
135 	return 0;
136 }
137 
138 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
139 {
140 	if (divider)
141 		return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
142 
143 	return 0;
144 }
145 
146 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
147 					     struct mlx5_esw_rate_group *group,
148 					     struct netlink_ext_ack *extack)
149 {
150 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
151 	u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
152 	struct mlx5_vport *evport;
153 	unsigned long i;
154 	u32 bw_share;
155 	int err;
156 
157 	mlx5_esw_for_each_vport(esw, i, evport) {
158 		if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
159 			continue;
160 		bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
161 
162 		if (bw_share == evport->qos.bw_share)
163 			continue;
164 
165 		err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
166 		if (err)
167 			return err;
168 
169 		evport->qos.bw_share = bw_share;
170 	}
171 
172 	return 0;
173 }
174 
175 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
176 					     struct netlink_ext_ack *extack)
177 {
178 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
179 	struct mlx5_esw_rate_group *group;
180 	u32 bw_share;
181 	int err;
182 
183 	list_for_each_entry(group, &esw->qos.groups, list) {
184 		bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
185 
186 		if (bw_share == group->bw_share)
187 			continue;
188 
189 		err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
190 		if (err)
191 			return err;
192 
193 		group->bw_share = bw_share;
194 
195 		/* All the group's vports need to be set with default bw_share
196 		 * to enable them with QOS
197 		 */
198 		err = esw_qos_normalize_vports_min_rate(esw, group, extack);
199 
200 		if (err)
201 			return err;
202 	}
203 
204 	return 0;
205 }
206 
207 int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
208 				    struct mlx5_vport *evport,
209 				    u32 min_rate,
210 				    struct netlink_ext_ack *extack)
211 {
212 	u32 fw_max_bw_share, previous_min_rate;
213 	bool min_rate_supported;
214 	int err;
215 
216 	lockdep_assert_held(&esw->state_lock);
217 	fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
218 	min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
219 				fw_max_bw_share >= MLX5_MIN_BW_SHARE;
220 	if (min_rate && !min_rate_supported)
221 		return -EOPNOTSUPP;
222 	if (min_rate == evport->qos.min_rate)
223 		return 0;
224 
225 	previous_min_rate = evport->qos.min_rate;
226 	evport->qos.min_rate = min_rate;
227 	err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
228 	if (err)
229 		evport->qos.min_rate = previous_min_rate;
230 
231 	return err;
232 }
233 
234 int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
235 				    struct mlx5_vport *evport,
236 				    u32 max_rate,
237 				    struct netlink_ext_ack *extack)
238 {
239 	u32 act_max_rate = max_rate;
240 	bool max_rate_supported;
241 	int err;
242 
243 	lockdep_assert_held(&esw->state_lock);
244 	max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
245 
246 	if (max_rate && !max_rate_supported)
247 		return -EOPNOTSUPP;
248 	if (max_rate == evport->qos.max_rate)
249 		return 0;
250 
251 	/* If parent group has rate limit need to set to group
252 	 * value when new max rate is 0.
253 	 */
254 	if (evport->qos.group && !max_rate)
255 		act_max_rate = evport->qos.group->max_rate;
256 
257 	err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
258 
259 	if (!err)
260 		evport->qos.max_rate = max_rate;
261 
262 	return err;
263 }
264 
265 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
266 				      u32 min_rate, struct netlink_ext_ack *extack)
267 {
268 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
269 	struct mlx5_core_dev *dev = esw->dev;
270 	u32 previous_min_rate, divider;
271 	int err;
272 
273 	if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
274 		return -EOPNOTSUPP;
275 
276 	if (min_rate == group->min_rate)
277 		return 0;
278 
279 	previous_min_rate = group->min_rate;
280 	group->min_rate = min_rate;
281 	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
282 	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
283 	if (err) {
284 		group->min_rate = previous_min_rate;
285 		NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
286 
287 		/* Attempt restoring previous configuration */
288 		divider = esw_qos_calculate_min_rate_divider(esw, group, true);
289 		if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
290 			NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
291 	}
292 
293 	return err;
294 }
295 
296 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
297 				      struct mlx5_esw_rate_group *group,
298 				      u32 max_rate, struct netlink_ext_ack *extack)
299 {
300 	struct mlx5_vport *vport;
301 	unsigned long i;
302 	int err;
303 
304 	if (group->max_rate == max_rate)
305 		return 0;
306 
307 	err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
308 	if (err)
309 		return err;
310 
311 	group->max_rate = max_rate;
312 
313 	/* Any unlimited vports in the group should be set
314 	 * with the value of the group.
315 	 */
316 	mlx5_esw_for_each_vport(esw, i, vport) {
317 		if (!vport->enabled || !vport->qos.enabled ||
318 		    vport->qos.group != group || vport->qos.max_rate)
319 			continue;
320 
321 		err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
322 		if (err)
323 			NL_SET_ERR_MSG_MOD(extack,
324 					   "E-Switch vport implicit rate limit setting failed");
325 	}
326 
327 	return err;
328 }
329 
330 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
331 					      struct mlx5_vport *vport,
332 					      u32 max_rate, u32 bw_share)
333 {
334 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
335 	struct mlx5_esw_rate_group *group = vport->qos.group;
336 	struct mlx5_core_dev *dev = esw->dev;
337 	u32 parent_tsar_ix;
338 	void *vport_elem;
339 	int err;
340 
341 	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
342 	MLX5_SET(scheduling_context, sched_ctx, element_type,
343 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
344 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
345 	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
346 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
347 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
348 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
349 
350 	err = mlx5_create_scheduling_element_cmd(dev,
351 						 SCHEDULING_HIERARCHY_E_SWITCH,
352 						 sched_ctx,
353 						 &vport->qos.esw_tsar_ix);
354 	if (err) {
355 		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
356 			 vport->vport, err);
357 		return err;
358 	}
359 
360 	return 0;
361 }
362 
363 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
364 						   struct mlx5_vport *vport,
365 						   struct mlx5_esw_rate_group *curr_group,
366 						   struct mlx5_esw_rate_group *new_group,
367 						   struct netlink_ext_ack *extack)
368 {
369 	u32 max_rate;
370 	int err;
371 
372 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
373 						  SCHEDULING_HIERARCHY_E_SWITCH,
374 						  vport->qos.esw_tsar_ix);
375 	if (err) {
376 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
377 		return err;
378 	}
379 
380 	vport->qos.group = new_group;
381 	max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
382 
383 	/* If vport is unlimited, we set the group's value.
384 	 * Therefore, if the group is limited it will apply to
385 	 * the vport as well and if not, vport will remain unlimited.
386 	 */
387 	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
388 	if (err) {
389 		NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
390 		goto err_sched;
391 	}
392 
393 	return 0;
394 
395 err_sched:
396 	vport->qos.group = curr_group;
397 	max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
398 	if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
399 		esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
400 			 vport->vport);
401 
402 	return err;
403 }
404 
405 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
406 				      struct mlx5_vport *vport,
407 				      struct mlx5_esw_rate_group *group,
408 				      struct netlink_ext_ack *extack)
409 {
410 	struct mlx5_esw_rate_group *new_group, *curr_group;
411 	int err;
412 
413 	if (!vport->enabled)
414 		return -EINVAL;
415 
416 	curr_group = vport->qos.group;
417 	new_group = group ?: esw->qos.group0;
418 	if (curr_group == new_group)
419 		return 0;
420 
421 	err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
422 	if (err)
423 		return err;
424 
425 	/* Recalculate bw share weights of old and new groups */
426 	if (vport->qos.bw_share) {
427 		esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
428 		esw_qos_normalize_vports_min_rate(esw, new_group, extack);
429 	}
430 
431 	return 0;
432 }
433 
434 static struct mlx5_esw_rate_group *
435 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
436 {
437 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
438 	struct mlx5_esw_rate_group *group;
439 	u32 divider;
440 	int err;
441 
442 	if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
443 		return ERR_PTR(-EOPNOTSUPP);
444 
445 	group = kzalloc(sizeof(*group), GFP_KERNEL);
446 	if (!group)
447 		return ERR_PTR(-ENOMEM);
448 
449 	MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
450 		 esw->qos.root_tsar_ix);
451 	err = mlx5_create_scheduling_element_cmd(esw->dev,
452 						 SCHEDULING_HIERARCHY_E_SWITCH,
453 						 tsar_ctx,
454 						 &group->tsar_ix);
455 	if (err) {
456 		NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
457 		goto err_sched_elem;
458 	}
459 
460 	list_add_tail(&group->list, &esw->qos.groups);
461 
462 	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
463 	if (divider) {
464 		err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
465 		if (err) {
466 			NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
467 			goto err_min_rate;
468 		}
469 	}
470 	trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
471 
472 	return group;
473 
474 err_min_rate:
475 	list_del(&group->list);
476 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
477 						  SCHEDULING_HIERARCHY_E_SWITCH,
478 						  group->tsar_ix);
479 	if (err)
480 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
481 err_sched_elem:
482 	kfree(group);
483 	return ERR_PTR(err);
484 }
485 
486 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
487 				      struct mlx5_esw_rate_group *group,
488 				      struct netlink_ext_ack *extack)
489 {
490 	u32 divider;
491 	int err;
492 
493 	list_del(&group->list);
494 
495 	divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
496 	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
497 	if (err)
498 		NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
499 
500 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
501 						  SCHEDULING_HIERARCHY_E_SWITCH,
502 						  group->tsar_ix);
503 	if (err)
504 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
505 
506 	trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
507 	kfree(group);
508 	return err;
509 }
510 
511 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
512 {
513 	switch (type) {
514 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
515 		return MLX5_CAP_QOS(dev, esw_element_type) &
516 		       ELEMENT_TYPE_CAP_MASK_TASR;
517 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
518 		return MLX5_CAP_QOS(dev, esw_element_type) &
519 		       ELEMENT_TYPE_CAP_MASK_VPORT;
520 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
521 		return MLX5_CAP_QOS(dev, esw_element_type) &
522 		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
523 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
524 		return MLX5_CAP_QOS(dev, esw_element_type) &
525 		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
526 	}
527 	return false;
528 }
529 
530 void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
531 {
532 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
533 	struct mlx5_core_dev *dev = esw->dev;
534 	__be32 *attr;
535 	int err;
536 
537 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
538 		return;
539 
540 	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
541 		return;
542 
543 	mutex_lock(&esw->state_lock);
544 	if (esw->qos.enabled)
545 		goto unlock;
546 
547 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
548 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
549 
550 	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
551 	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
552 
553 	err = mlx5_create_scheduling_element_cmd(dev,
554 						 SCHEDULING_HIERARCHY_E_SWITCH,
555 						 tsar_ctx,
556 						 &esw->qos.root_tsar_ix);
557 	if (err) {
558 		esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
559 		goto unlock;
560 	}
561 
562 	INIT_LIST_HEAD(&esw->qos.groups);
563 	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
564 		esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
565 		if (IS_ERR(esw->qos.group0)) {
566 			esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
567 				 PTR_ERR(esw->qos.group0));
568 			goto err_group0;
569 		}
570 	}
571 	esw->qos.enabled = true;
572 unlock:
573 	mutex_unlock(&esw->state_lock);
574 	return;
575 
576 err_group0:
577 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
578 						  SCHEDULING_HIERARCHY_E_SWITCH,
579 						  esw->qos.root_tsar_ix);
580 	if (err)
581 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
582 	mutex_unlock(&esw->state_lock);
583 }
584 
585 void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
586 {
587 	struct devlink *devlink = priv_to_devlink(esw->dev);
588 	int err;
589 
590 	devlink_rate_nodes_destroy(devlink);
591 	mutex_lock(&esw->state_lock);
592 	if (!esw->qos.enabled)
593 		goto unlock;
594 
595 	if (esw->qos.group0)
596 		esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
597 
598 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
599 						  SCHEDULING_HIERARCHY_E_SWITCH,
600 						  esw->qos.root_tsar_ix);
601 	if (err)
602 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
603 
604 	esw->qos.enabled = false;
605 unlock:
606 	mutex_unlock(&esw->state_lock);
607 }
608 
609 int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
610 			      u32 max_rate, u32 bw_share)
611 {
612 	int err;
613 
614 	lockdep_assert_held(&esw->state_lock);
615 	if (!esw->qos.enabled)
616 		return 0;
617 
618 	if (vport->qos.enabled)
619 		return -EEXIST;
620 
621 	vport->qos.group = esw->qos.group0;
622 
623 	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
624 	if (!err) {
625 		vport->qos.enabled = true;
626 		trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
627 	}
628 
629 	return err;
630 }
631 
632 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
633 {
634 	int err;
635 
636 	lockdep_assert_held(&esw->state_lock);
637 	if (!esw->qos.enabled || !vport->qos.enabled)
638 		return;
639 	WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
640 	     "Disabling QoS on port before detaching it from group");
641 
642 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
643 						  SCHEDULING_HIERARCHY_E_SWITCH,
644 						  vport->qos.esw_tsar_ix);
645 	if (err)
646 		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
647 			 vport->vport, err);
648 
649 	vport->qos.enabled = false;
650 	trace_mlx5_esw_vport_qos_destroy(vport);
651 }
652 
653 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
654 {
655 	u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
656 	struct mlx5_vport *vport;
657 	u32 bitmask;
658 
659 	vport = mlx5_eswitch_get_vport(esw, vport_num);
660 	if (IS_ERR(vport))
661 		return PTR_ERR(vport);
662 
663 	if (!vport->qos.enabled)
664 		return -EOPNOTSUPP;
665 
666 	MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
667 	bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
668 
669 	return mlx5_modify_scheduling_element_cmd(esw->dev,
670 						  SCHEDULING_HIERARCHY_E_SWITCH,
671 						  ctx,
672 						  vport->qos.esw_tsar_ix,
673 						  bitmask);
674 }
675 
676 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
677 
678 /* Converts bytes per second value passed in a pointer into megabits per
679  * second, rewriting last. If converted rate exceed link speed or is not a
680  * fraction of Mbps - returns error.
681  */
682 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
683 					u64 *rate, struct netlink_ext_ack *extack)
684 {
685 	u32 link_speed_max, reminder;
686 	u64 value;
687 	int err;
688 
689 	err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
690 	if (err) {
691 		NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
692 		return err;
693 	}
694 
695 	value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
696 	if (reminder) {
697 		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
698 		       name, *rate);
699 		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
700 		return -EINVAL;
701 	}
702 
703 	if (value > link_speed_max) {
704 		pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
705 		       name, value, link_speed_max);
706 		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
707 		return -EINVAL;
708 	}
709 
710 	*rate = value;
711 	return 0;
712 }
713 
714 /* Eswitch devlink rate API */
715 
716 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
717 					    u64 tx_share, struct netlink_ext_ack *extack)
718 {
719 	struct mlx5_vport *vport = priv;
720 	struct mlx5_eswitch *esw;
721 	int err;
722 
723 	esw = vport->dev->priv.eswitch;
724 	if (!mlx5_esw_allowed(esw))
725 		return -EPERM;
726 
727 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
728 	if (err)
729 		return err;
730 
731 	mutex_lock(&esw->state_lock);
732 	err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
733 	mutex_unlock(&esw->state_lock);
734 	return err;
735 }
736 
737 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
738 					  u64 tx_max, struct netlink_ext_ack *extack)
739 {
740 	struct mlx5_vport *vport = priv;
741 	struct mlx5_eswitch *esw;
742 	int err;
743 
744 	esw = vport->dev->priv.eswitch;
745 	if (!mlx5_esw_allowed(esw))
746 		return -EPERM;
747 
748 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
749 	if (err)
750 		return err;
751 
752 	mutex_lock(&esw->state_lock);
753 	err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
754 	mutex_unlock(&esw->state_lock);
755 	return err;
756 }
757 
758 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
759 					    u64 tx_share, struct netlink_ext_ack *extack)
760 {
761 	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
762 	struct mlx5_eswitch *esw = dev->priv.eswitch;
763 	struct mlx5_esw_rate_group *group = priv;
764 	int err;
765 
766 	err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
767 	if (err)
768 		return err;
769 
770 	mutex_lock(&esw->state_lock);
771 	err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
772 	mutex_unlock(&esw->state_lock);
773 	return err;
774 }
775 
776 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
777 					  u64 tx_max, struct netlink_ext_ack *extack)
778 {
779 	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
780 	struct mlx5_eswitch *esw = dev->priv.eswitch;
781 	struct mlx5_esw_rate_group *group = priv;
782 	int err;
783 
784 	err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
785 	if (err)
786 		return err;
787 
788 	mutex_lock(&esw->state_lock);
789 	err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
790 	mutex_unlock(&esw->state_lock);
791 	return err;
792 }
793 
794 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
795 				   struct netlink_ext_ack *extack)
796 {
797 	struct mlx5_esw_rate_group *group;
798 	struct mlx5_eswitch *esw;
799 	int err = 0;
800 
801 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
802 	if (IS_ERR(esw))
803 		return PTR_ERR(esw);
804 
805 	mutex_lock(&esw->state_lock);
806 	if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
807 		NL_SET_ERR_MSG_MOD(extack,
808 				   "Rate node creation supported only in switchdev mode");
809 		err = -EOPNOTSUPP;
810 		goto unlock;
811 	}
812 
813 	group = esw_qos_create_rate_group(esw, extack);
814 	if (IS_ERR(group)) {
815 		err = PTR_ERR(group);
816 		goto unlock;
817 	}
818 
819 	*priv = group;
820 unlock:
821 	mutex_unlock(&esw->state_lock);
822 	return err;
823 }
824 
825 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
826 				   struct netlink_ext_ack *extack)
827 {
828 	struct mlx5_esw_rate_group *group = priv;
829 	struct mlx5_eswitch *esw;
830 	int err;
831 
832 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
833 	if (IS_ERR(esw))
834 		return PTR_ERR(esw);
835 
836 	mutex_lock(&esw->state_lock);
837 	err = esw_qos_destroy_rate_group(esw, group, extack);
838 	mutex_unlock(&esw->state_lock);
839 	return err;
840 }
841 
842 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
843 				    struct mlx5_vport *vport,
844 				    struct mlx5_esw_rate_group *group,
845 				    struct netlink_ext_ack *extack)
846 {
847 	int err;
848 
849 	mutex_lock(&esw->state_lock);
850 	err = esw_qos_vport_update_group(esw, vport, group, extack);
851 	mutex_unlock(&esw->state_lock);
852 	return err;
853 }
854 
855 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
856 				     struct devlink_rate *parent,
857 				     void *priv, void *parent_priv,
858 				     struct netlink_ext_ack *extack)
859 {
860 	struct mlx5_esw_rate_group *group;
861 	struct mlx5_vport *vport = priv;
862 
863 	if (!parent)
864 		return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
865 						       vport, NULL, extack);
866 
867 	group = parent_priv;
868 	return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
869 }
870