1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3 
4 #include "eswitch.h"
5 #include "esw/qos.h"
6 #include "en/port.h"
7 #define CREATE_TRACE_POINTS
8 #include "diag/qos_tracepoint.h"
9 
10 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
11 #define MLX5_MIN_BW_SHARE 1
12 
13 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
14 	min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
15 
16 struct mlx5_esw_rate_group {
17 	u32 tsar_ix;
18 	u32 max_rate;
19 	u32 min_rate;
20 	u32 bw_share;
21 	struct list_head list;
22 };
23 
esw_qos_tsar_config(struct mlx5_core_dev * dev,u32 * sched_ctx,u32 tsar_ix,u32 max_rate,u32 bw_share)24 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
25 			       u32 tsar_ix, u32 max_rate, u32 bw_share)
26 {
27 	u32 bitmask = 0;
28 
29 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
30 		return -EOPNOTSUPP;
31 
32 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
33 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
34 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
35 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
36 
37 	return mlx5_modify_scheduling_element_cmd(dev,
38 						  SCHEDULING_HIERARCHY_E_SWITCH,
39 						  sched_ctx,
40 						  tsar_ix,
41 						  bitmask);
42 }
43 
esw_qos_group_config(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)44 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
45 				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
46 {
47 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
48 	struct mlx5_core_dev *dev = esw->dev;
49 	int err;
50 
51 	err = esw_qos_tsar_config(dev, sched_ctx,
52 				  group->tsar_ix,
53 				  max_rate, bw_share);
54 	if (err)
55 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
56 
57 	trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
58 
59 	return err;
60 }
61 
esw_qos_vport_config(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)62 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
63 				struct mlx5_vport *vport,
64 				u32 max_rate, u32 bw_share,
65 				struct netlink_ext_ack *extack)
66 {
67 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
68 	struct mlx5_core_dev *dev = esw->dev;
69 	int err;
70 
71 	if (!vport->qos.enabled)
72 		return -EIO;
73 
74 	err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
75 				  max_rate, bw_share);
76 	if (err) {
77 		esw_warn(esw->dev,
78 			 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
79 			 vport->vport, err);
80 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
81 		return err;
82 	}
83 
84 	trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
85 
86 	return 0;
87 }
88 
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,bool group_level)89 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
90 					      struct mlx5_esw_rate_group *group,
91 					      bool group_level)
92 {
93 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
94 	struct mlx5_vport *evport;
95 	u32 max_guarantee = 0;
96 	unsigned long i;
97 
98 	if (group_level) {
99 		struct mlx5_esw_rate_group *group;
100 
101 		list_for_each_entry(group, &esw->qos.groups, list) {
102 			if (group->min_rate < max_guarantee)
103 				continue;
104 			max_guarantee = group->min_rate;
105 		}
106 	} else {
107 		mlx5_esw_for_each_vport(esw, i, evport) {
108 			if (!evport->enabled || !evport->qos.enabled ||
109 			    evport->qos.group != group || evport->qos.min_rate < max_guarantee)
110 				continue;
111 			max_guarantee = evport->qos.min_rate;
112 		}
113 	}
114 
115 	if (max_guarantee)
116 		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
117 
118 	/* If vports min rate divider is 0 but their group has bw_share configured, then
119 	 * need to set bw_share for vports to minimal value.
120 	 */
121 	if (!group_level && !max_guarantee && group && group->bw_share)
122 		return 1;
123 	return 0;
124 }
125 
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)126 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
127 {
128 	if (divider)
129 		return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
130 
131 	return 0;
132 }
133 
esw_qos_normalize_vports_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)134 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
135 					     struct mlx5_esw_rate_group *group,
136 					     struct netlink_ext_ack *extack)
137 {
138 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
139 	u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
140 	struct mlx5_vport *evport;
141 	unsigned long i;
142 	u32 bw_share;
143 	int err;
144 
145 	mlx5_esw_for_each_vport(esw, i, evport) {
146 		if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
147 			continue;
148 		bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
149 
150 		if (bw_share == evport->qos.bw_share)
151 			continue;
152 
153 		err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
154 		if (err)
155 			return err;
156 
157 		evport->qos.bw_share = bw_share;
158 	}
159 
160 	return 0;
161 }
162 
esw_qos_normalize_groups_min_rate(struct mlx5_eswitch * esw,u32 divider,struct netlink_ext_ack * extack)163 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
164 					     struct netlink_ext_ack *extack)
165 {
166 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
167 	struct mlx5_esw_rate_group *group;
168 	u32 bw_share;
169 	int err;
170 
171 	list_for_each_entry(group, &esw->qos.groups, list) {
172 		bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
173 
174 		if (bw_share == group->bw_share)
175 			continue;
176 
177 		err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
178 		if (err)
179 			return err;
180 
181 		group->bw_share = bw_share;
182 
183 		/* All the group's vports need to be set with default bw_share
184 		 * to enable them with QOS
185 		 */
186 		err = esw_qos_normalize_vports_min_rate(esw, group, extack);
187 
188 		if (err)
189 			return err;
190 	}
191 
192 	return 0;
193 }
194 
esw_qos_set_vport_min_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 min_rate,struct netlink_ext_ack * extack)195 static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
196 				      u32 min_rate, struct netlink_ext_ack *extack)
197 {
198 	u32 fw_max_bw_share, previous_min_rate;
199 	bool min_rate_supported;
200 	int err;
201 
202 	lockdep_assert_held(&esw->state_lock);
203 	fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
204 	min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
205 				fw_max_bw_share >= MLX5_MIN_BW_SHARE;
206 	if (min_rate && !min_rate_supported)
207 		return -EOPNOTSUPP;
208 	if (min_rate == evport->qos.min_rate)
209 		return 0;
210 
211 	previous_min_rate = evport->qos.min_rate;
212 	evport->qos.min_rate = min_rate;
213 	err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
214 	if (err)
215 		evport->qos.min_rate = previous_min_rate;
216 
217 	return err;
218 }
219 
esw_qos_set_vport_max_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 max_rate,struct netlink_ext_ack * extack)220 static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
221 				      u32 max_rate, struct netlink_ext_ack *extack)
222 {
223 	u32 act_max_rate = max_rate;
224 	bool max_rate_supported;
225 	int err;
226 
227 	lockdep_assert_held(&esw->state_lock);
228 	max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
229 
230 	if (max_rate && !max_rate_supported)
231 		return -EOPNOTSUPP;
232 	if (max_rate == evport->qos.max_rate)
233 		return 0;
234 
235 	/* If parent group has rate limit need to set to group
236 	 * value when new max rate is 0.
237 	 */
238 	if (evport->qos.group && !max_rate)
239 		act_max_rate = evport->qos.group->max_rate;
240 
241 	err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
242 
243 	if (!err)
244 		evport->qos.max_rate = max_rate;
245 
246 	return err;
247 }
248 
esw_qos_set_group_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 min_rate,struct netlink_ext_ack * extack)249 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
250 				      u32 min_rate, struct netlink_ext_ack *extack)
251 {
252 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
253 	struct mlx5_core_dev *dev = esw->dev;
254 	u32 previous_min_rate, divider;
255 	int err;
256 
257 	if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
258 		return -EOPNOTSUPP;
259 
260 	if (min_rate == group->min_rate)
261 		return 0;
262 
263 	previous_min_rate = group->min_rate;
264 	group->min_rate = min_rate;
265 	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
266 	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
267 	if (err) {
268 		group->min_rate = previous_min_rate;
269 		NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
270 
271 		/* Attempt restoring previous configuration */
272 		divider = esw_qos_calculate_min_rate_divider(esw, group, true);
273 		if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
274 			NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
275 	}
276 
277 	return err;
278 }
279 
esw_qos_set_group_max_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,struct netlink_ext_ack * extack)280 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
281 				      struct mlx5_esw_rate_group *group,
282 				      u32 max_rate, struct netlink_ext_ack *extack)
283 {
284 	struct mlx5_vport *vport;
285 	unsigned long i;
286 	int err;
287 
288 	if (group->max_rate == max_rate)
289 		return 0;
290 
291 	err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
292 	if (err)
293 		return err;
294 
295 	group->max_rate = max_rate;
296 
297 	/* Any unlimited vports in the group should be set
298 	 * with the value of the group.
299 	 */
300 	mlx5_esw_for_each_vport(esw, i, vport) {
301 		if (!vport->enabled || !vport->qos.enabled ||
302 		    vport->qos.group != group || vport->qos.max_rate)
303 			continue;
304 
305 		err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
306 		if (err)
307 			NL_SET_ERR_MSG_MOD(extack,
308 					   "E-Switch vport implicit rate limit setting failed");
309 	}
310 
311 	return err;
312 }
313 
esw_qos_element_type_supported(struct mlx5_core_dev * dev,int type)314 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
315 {
316 	switch (type) {
317 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
318 		return MLX5_CAP_QOS(dev, esw_element_type) &
319 		       ELEMENT_TYPE_CAP_MASK_TSAR;
320 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
321 		return MLX5_CAP_QOS(dev, esw_element_type) &
322 		       ELEMENT_TYPE_CAP_MASK_VPORT;
323 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
324 		return MLX5_CAP_QOS(dev, esw_element_type) &
325 		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
326 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
327 		return MLX5_CAP_QOS(dev, esw_element_type) &
328 		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
329 	}
330 	return false;
331 }
332 
esw_qos_vport_create_sched_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)333 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
334 					      struct mlx5_vport *vport,
335 					      u32 max_rate, u32 bw_share)
336 {
337 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
338 	struct mlx5_esw_rate_group *group = vport->qos.group;
339 	struct mlx5_core_dev *dev = esw->dev;
340 	u32 parent_tsar_ix;
341 	void *vport_elem;
342 	int err;
343 
344 	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT))
345 		return -EOPNOTSUPP;
346 
347 	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
348 	MLX5_SET(scheduling_context, sched_ctx, element_type,
349 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
350 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
351 	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
352 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
353 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
354 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
355 
356 	err = mlx5_create_scheduling_element_cmd(dev,
357 						 SCHEDULING_HIERARCHY_E_SWITCH,
358 						 sched_ctx,
359 						 &vport->qos.esw_tsar_ix);
360 	if (err) {
361 		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
362 			 vport->vport, err);
363 		return err;
364 	}
365 
366 	return 0;
367 }
368 
esw_qos_update_group_scheduling_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * curr_group,struct mlx5_esw_rate_group * new_group,struct netlink_ext_ack * extack)369 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
370 						   struct mlx5_vport *vport,
371 						   struct mlx5_esw_rate_group *curr_group,
372 						   struct mlx5_esw_rate_group *new_group,
373 						   struct netlink_ext_ack *extack)
374 {
375 	u32 max_rate;
376 	int err;
377 
378 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
379 						  SCHEDULING_HIERARCHY_E_SWITCH,
380 						  vport->qos.esw_tsar_ix);
381 	if (err) {
382 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
383 		return err;
384 	}
385 
386 	vport->qos.group = new_group;
387 	max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
388 
389 	/* If vport is unlimited, we set the group's value.
390 	 * Therefore, if the group is limited it will apply to
391 	 * the vport as well and if not, vport will remain unlimited.
392 	 */
393 	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
394 	if (err) {
395 		NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
396 		goto err_sched;
397 	}
398 
399 	return 0;
400 
401 err_sched:
402 	vport->qos.group = curr_group;
403 	max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
404 	if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
405 		esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
406 			 vport->vport);
407 
408 	return err;
409 }
410 
esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)411 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
412 				      struct mlx5_vport *vport,
413 				      struct mlx5_esw_rate_group *group,
414 				      struct netlink_ext_ack *extack)
415 {
416 	struct mlx5_esw_rate_group *new_group, *curr_group;
417 	int err;
418 
419 	if (!vport->enabled)
420 		return -EINVAL;
421 
422 	curr_group = vport->qos.group;
423 	new_group = group ?: esw->qos.group0;
424 	if (curr_group == new_group)
425 		return 0;
426 
427 	err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
428 	if (err)
429 		return err;
430 
431 	/* Recalculate bw share weights of old and new groups */
432 	if (vport->qos.bw_share || new_group->bw_share) {
433 		esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
434 		esw_qos_normalize_vports_min_rate(esw, new_group, extack);
435 	}
436 
437 	return 0;
438 }
439 
440 static struct mlx5_esw_rate_group *
__esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)441 __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
442 {
443 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
444 	struct mlx5_esw_rate_group *group;
445 	__be32 *attr;
446 	u32 divider;
447 	int err;
448 
449 	group = kzalloc(sizeof(*group), GFP_KERNEL);
450 	if (!group)
451 		return ERR_PTR(-ENOMEM);
452 
453 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
454 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
455 
456 	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
457 	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
458 
459 	MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
460 		 esw->qos.root_tsar_ix);
461 	err = mlx5_create_scheduling_element_cmd(esw->dev,
462 						 SCHEDULING_HIERARCHY_E_SWITCH,
463 						 tsar_ctx,
464 						 &group->tsar_ix);
465 	if (err) {
466 		NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
467 		goto err_sched_elem;
468 	}
469 
470 	list_add_tail(&group->list, &esw->qos.groups);
471 
472 	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
473 	if (divider) {
474 		err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
475 		if (err) {
476 			NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
477 			goto err_min_rate;
478 		}
479 	}
480 	trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
481 
482 	return group;
483 
484 err_min_rate:
485 	list_del(&group->list);
486 	if (mlx5_destroy_scheduling_element_cmd(esw->dev,
487 						SCHEDULING_HIERARCHY_E_SWITCH,
488 						group->tsar_ix))
489 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
490 err_sched_elem:
491 	kfree(group);
492 	return ERR_PTR(err);
493 }
494 
495 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
496 static void esw_qos_put(struct mlx5_eswitch *esw);
497 
498 static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)499 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
500 {
501 	struct mlx5_esw_rate_group *group;
502 	int err;
503 
504 	if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
505 		return ERR_PTR(-EOPNOTSUPP);
506 
507 	err = esw_qos_get(esw, extack);
508 	if (err)
509 		return ERR_PTR(err);
510 
511 	group = __esw_qos_create_rate_group(esw, extack);
512 	if (IS_ERR(group))
513 		esw_qos_put(esw);
514 
515 	return group;
516 }
517 
__esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)518 static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
519 					struct mlx5_esw_rate_group *group,
520 					struct netlink_ext_ack *extack)
521 {
522 	u32 divider;
523 	int err;
524 
525 	list_del(&group->list);
526 
527 	divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
528 	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
529 	if (err)
530 		NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
531 
532 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
533 						  SCHEDULING_HIERARCHY_E_SWITCH,
534 						  group->tsar_ix);
535 	if (err)
536 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
537 
538 	trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
539 
540 	kfree(group);
541 
542 	return err;
543 }
544 
esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)545 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
546 				      struct mlx5_esw_rate_group *group,
547 				      struct netlink_ext_ack *extack)
548 {
549 	int err;
550 
551 	err = __esw_qos_destroy_rate_group(esw, group, extack);
552 	esw_qos_put(esw);
553 
554 	return err;
555 }
556 
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)557 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
558 {
559 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
560 	struct mlx5_core_dev *dev = esw->dev;
561 	__be32 *attr;
562 	int err;
563 
564 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
565 		return -EOPNOTSUPP;
566 
567 	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) ||
568 	    !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
569 		return -EOPNOTSUPP;
570 
571 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
572 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
573 
574 	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
575 	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
576 
577 	err = mlx5_create_scheduling_element_cmd(dev,
578 						 SCHEDULING_HIERARCHY_E_SWITCH,
579 						 tsar_ctx,
580 						 &esw->qos.root_tsar_ix);
581 	if (err) {
582 		esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
583 		return err;
584 	}
585 
586 	INIT_LIST_HEAD(&esw->qos.groups);
587 	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
588 		esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
589 		if (IS_ERR(esw->qos.group0)) {
590 			esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
591 				 PTR_ERR(esw->qos.group0));
592 			err = PTR_ERR(esw->qos.group0);
593 			goto err_group0;
594 		}
595 	}
596 	refcount_set(&esw->qos.refcnt, 1);
597 
598 	return 0;
599 
600 err_group0:
601 	if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
602 						esw->qos.root_tsar_ix))
603 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
604 
605 	return err;
606 }
607 
esw_qos_destroy(struct mlx5_eswitch * esw)608 static void esw_qos_destroy(struct mlx5_eswitch *esw)
609 {
610 	int err;
611 
612 	if (esw->qos.group0)
613 		__esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
614 
615 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
616 						  SCHEDULING_HIERARCHY_E_SWITCH,
617 						  esw->qos.root_tsar_ix);
618 	if (err)
619 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
620 }
621 
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)622 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
623 {
624 	int err = 0;
625 
626 	lockdep_assert_held(&esw->state_lock);
627 
628 	if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
629 		/* esw_qos_create() set refcount to 1 only on success.
630 		 * No need to decrement on failure.
631 		 */
632 		err = esw_qos_create(esw, extack);
633 	}
634 
635 	return err;
636 }
637 
esw_qos_put(struct mlx5_eswitch * esw)638 static void esw_qos_put(struct mlx5_eswitch *esw)
639 {
640 	lockdep_assert_held(&esw->state_lock);
641 	if (refcount_dec_and_test(&esw->qos.refcnt))
642 		esw_qos_destroy(esw);
643 }
644 
esw_qos_vport_enable(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)645 static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
646 				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
647 {
648 	int err;
649 
650 	lockdep_assert_held(&esw->state_lock);
651 	if (vport->qos.enabled)
652 		return 0;
653 
654 	err = esw_qos_get(esw, extack);
655 	if (err)
656 		return err;
657 
658 	vport->qos.group = esw->qos.group0;
659 
660 	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
661 	if (err)
662 		goto err_out;
663 
664 	vport->qos.enabled = true;
665 	trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
666 
667 	return 0;
668 
669 err_out:
670 	esw_qos_put(esw);
671 
672 	return err;
673 }
674 
mlx5_esw_qos_vport_disable(struct mlx5_eswitch * esw,struct mlx5_vport * vport)675 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
676 {
677 	int err;
678 
679 	lockdep_assert_held(&esw->state_lock);
680 	if (!vport->qos.enabled)
681 		return;
682 	WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
683 	     "Disabling QoS on port before detaching it from group");
684 
685 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
686 						  SCHEDULING_HIERARCHY_E_SWITCH,
687 						  vport->qos.esw_tsar_ix);
688 	if (err)
689 		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
690 			 vport->vport, err);
691 
692 	memset(&vport->qos, 0, sizeof(vport->qos));
693 	trace_mlx5_esw_vport_qos_destroy(vport);
694 
695 	esw_qos_put(esw);
696 }
697 
mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 min_rate)698 int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
699 				u32 max_rate, u32 min_rate)
700 {
701 	int err;
702 
703 	lockdep_assert_held(&esw->state_lock);
704 	err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
705 	if (err)
706 		return err;
707 
708 	err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
709 	if (!err)
710 		err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
711 
712 	return err;
713 }
714 
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)715 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
716 {
717 	u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
718 	struct mlx5_vport *vport;
719 	u32 bitmask;
720 	int err;
721 
722 	vport = mlx5_eswitch_get_vport(esw, vport_num);
723 	if (IS_ERR(vport))
724 		return PTR_ERR(vport);
725 
726 	mutex_lock(&esw->state_lock);
727 	if (!vport->qos.enabled) {
728 		/* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
729 		err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
730 	} else {
731 		MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
732 
733 		bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
734 		err = mlx5_modify_scheduling_element_cmd(esw->dev,
735 							 SCHEDULING_HIERARCHY_E_SWITCH,
736 							 ctx,
737 							 vport->qos.esw_tsar_ix,
738 							 bitmask);
739 	}
740 	mutex_unlock(&esw->state_lock);
741 
742 	return err;
743 }
744 
745 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
746 
747 /* Converts bytes per second value passed in a pointer into megabits per
748  * second, rewriting last. If converted rate exceed link speed or is not a
749  * fraction of Mbps - returns error.
750  */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)751 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
752 					u64 *rate, struct netlink_ext_ack *extack)
753 {
754 	u32 link_speed_max, remainder;
755 	u64 value;
756 	int err;
757 
758 	err = mlx5_port_max_linkspeed(mdev, &link_speed_max);
759 	if (err) {
760 		NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
761 		return err;
762 	}
763 
764 	value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
765 	if (remainder) {
766 		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
767 		       name, *rate);
768 		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
769 		return -EINVAL;
770 	}
771 
772 	if (value > link_speed_max) {
773 		pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
774 		       name, value, link_speed_max);
775 		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
776 		return -EINVAL;
777 	}
778 
779 	*rate = value;
780 	return 0;
781 }
782 
783 /* Eswitch devlink rate API */
784 
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)785 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
786 					    u64 tx_share, struct netlink_ext_ack *extack)
787 {
788 	struct mlx5_vport *vport = priv;
789 	struct mlx5_eswitch *esw;
790 	int err;
791 
792 	esw = vport->dev->priv.eswitch;
793 	if (!mlx5_esw_allowed(esw))
794 		return -EPERM;
795 
796 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
797 	if (err)
798 		return err;
799 
800 	mutex_lock(&esw->state_lock);
801 	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
802 	if (err)
803 		goto unlock;
804 
805 	err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
806 unlock:
807 	mutex_unlock(&esw->state_lock);
808 	return err;
809 }
810 
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)811 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
812 					  u64 tx_max, struct netlink_ext_ack *extack)
813 {
814 	struct mlx5_vport *vport = priv;
815 	struct mlx5_eswitch *esw;
816 	int err;
817 
818 	esw = vport->dev->priv.eswitch;
819 	if (!mlx5_esw_allowed(esw))
820 		return -EPERM;
821 
822 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
823 	if (err)
824 		return err;
825 
826 	mutex_lock(&esw->state_lock);
827 	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
828 	if (err)
829 		goto unlock;
830 
831 	err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
832 unlock:
833 	mutex_unlock(&esw->state_lock);
834 	return err;
835 }
836 
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)837 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
838 					    u64 tx_share, struct netlink_ext_ack *extack)
839 {
840 	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
841 	struct mlx5_eswitch *esw = dev->priv.eswitch;
842 	struct mlx5_esw_rate_group *group = priv;
843 	int err;
844 
845 	err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
846 	if (err)
847 		return err;
848 
849 	mutex_lock(&esw->state_lock);
850 	err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
851 	mutex_unlock(&esw->state_lock);
852 	return err;
853 }
854 
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)855 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
856 					  u64 tx_max, struct netlink_ext_ack *extack)
857 {
858 	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
859 	struct mlx5_eswitch *esw = dev->priv.eswitch;
860 	struct mlx5_esw_rate_group *group = priv;
861 	int err;
862 
863 	err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
864 	if (err)
865 		return err;
866 
867 	mutex_lock(&esw->state_lock);
868 	err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
869 	mutex_unlock(&esw->state_lock);
870 	return err;
871 }
872 
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)873 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
874 				   struct netlink_ext_ack *extack)
875 {
876 	struct mlx5_esw_rate_group *group;
877 	struct mlx5_eswitch *esw;
878 	int err = 0;
879 
880 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
881 	if (IS_ERR(esw))
882 		return PTR_ERR(esw);
883 
884 	mutex_lock(&esw->state_lock);
885 	if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
886 		NL_SET_ERR_MSG_MOD(extack,
887 				   "Rate node creation supported only in switchdev mode");
888 		err = -EOPNOTSUPP;
889 		goto unlock;
890 	}
891 
892 	group = esw_qos_create_rate_group(esw, extack);
893 	if (IS_ERR(group)) {
894 		err = PTR_ERR(group);
895 		goto unlock;
896 	}
897 
898 	*priv = group;
899 unlock:
900 	mutex_unlock(&esw->state_lock);
901 	return err;
902 }
903 
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)904 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
905 				   struct netlink_ext_ack *extack)
906 {
907 	struct mlx5_esw_rate_group *group = priv;
908 	struct mlx5_eswitch *esw;
909 	int err;
910 
911 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
912 	if (IS_ERR(esw))
913 		return PTR_ERR(esw);
914 
915 	mutex_lock(&esw->state_lock);
916 	err = esw_qos_destroy_rate_group(esw, group, extack);
917 	mutex_unlock(&esw->state_lock);
918 	return err;
919 }
920 
mlx5_esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)921 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
922 				    struct mlx5_vport *vport,
923 				    struct mlx5_esw_rate_group *group,
924 				    struct netlink_ext_ack *extack)
925 {
926 	int err = 0;
927 
928 	mutex_lock(&esw->state_lock);
929 	if (!vport->qos.enabled && !group)
930 		goto unlock;
931 
932 	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
933 	if (!err)
934 		err = esw_qos_vport_update_group(esw, vport, group, extack);
935 unlock:
936 	mutex_unlock(&esw->state_lock);
937 	return err;
938 }
939 
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)940 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
941 				     struct devlink_rate *parent,
942 				     void *priv, void *parent_priv,
943 				     struct netlink_ext_ack *extack)
944 {
945 	struct mlx5_esw_rate_group *group;
946 	struct mlx5_vport *vport = priv;
947 
948 	if (!parent)
949 		return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
950 						       vport, NULL, extack);
951 
952 	group = parent_priv;
953 	return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
954 }
955