1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3 
4 #include "eswitch.h"
5 #include "esw/qos.h"
6 #include "en/port.h"
7 
8 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
9 #define MLX5_MIN_BW_SHARE 1
10 
11 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
12 	min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
13 
14 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
15 				struct mlx5_vport *vport,
16 				u32 max_rate, u32 bw_share,
17 				struct netlink_ext_ack *extack)
18 {
19 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
20 	struct mlx5_core_dev *dev = esw->dev;
21 	void *vport_elem;
22 	u32 bitmask = 0;
23 	int err;
24 
25 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
26 		return -EOPNOTSUPP;
27 
28 	if (!vport->qos.enabled)
29 		return -EIO;
30 
31 	MLX5_SET(scheduling_context, sched_ctx, element_type,
32 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
33 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
34 				  element_attributes);
35 	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
36 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_ix);
37 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
38 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
39 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
40 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
41 
42 	err = mlx5_modify_scheduling_element_cmd(dev,
43 						 SCHEDULING_HIERARCHY_E_SWITCH,
44 						 sched_ctx,
45 						 vport->qos.esw_tsar_ix,
46 						 bitmask);
47 	if (err) {
48 		esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
49 			 vport->vport, err);
50 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
51 		return err;
52 	}
53 
54 	return 0;
55 }
56 
57 static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
58 {
59 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
60 	struct mlx5_vport *evport;
61 	u32 max_guarantee = 0;
62 	unsigned long i;
63 
64 	mlx5_esw_for_each_vport(esw, i, evport) {
65 		if (!evport->enabled || evport->qos.min_rate < max_guarantee)
66 			continue;
67 		max_guarantee = evport->qos.min_rate;
68 	}
69 
70 	if (max_guarantee)
71 		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
72 	return 0;
73 }
74 
75 static int
76 esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
77 {
78 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
79 	u32 divider = calculate_vports_min_rate_divider(esw);
80 	struct mlx5_vport *evport;
81 	u32 vport_max_rate;
82 	u32 vport_min_rate;
83 	unsigned long i;
84 	u32 bw_share;
85 	int err;
86 
87 	mlx5_esw_for_each_vport(esw, i, evport) {
88 		if (!evport->enabled)
89 			continue;
90 		vport_min_rate = evport->qos.min_rate;
91 		vport_max_rate = evport->qos.max_rate;
92 		bw_share = 0;
93 
94 		if (divider)
95 			bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
96 							 divider,
97 							 fw_max_bw_share);
98 
99 		if (bw_share == evport->qos.bw_share)
100 			continue;
101 
102 		err = esw_qos_vport_config(esw, evport, vport_max_rate, bw_share, extack);
103 		if (!err)
104 			evport->qos.bw_share = bw_share;
105 		else
106 			return err;
107 	}
108 
109 	return 0;
110 }
111 
112 int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
113 				    struct mlx5_vport *evport,
114 				    u32 min_rate,
115 				    struct netlink_ext_ack *extack)
116 {
117 	u32 fw_max_bw_share, previous_min_rate;
118 	bool min_rate_supported;
119 	int err;
120 
121 	lockdep_assert_held(&esw->state_lock);
122 	fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
123 	min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
124 				fw_max_bw_share >= MLX5_MIN_BW_SHARE;
125 	if (min_rate && !min_rate_supported)
126 		return -EOPNOTSUPP;
127 	if (min_rate == evport->qos.min_rate)
128 		return 0;
129 
130 	previous_min_rate = evport->qos.min_rate;
131 	evport->qos.min_rate = min_rate;
132 	err = esw_qos_normalize_vports_min_rate(esw, extack);
133 	if (err)
134 		evport->qos.min_rate = previous_min_rate;
135 
136 	return err;
137 }
138 
139 int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
140 				    struct mlx5_vport *evport,
141 				    u32 max_rate,
142 				    struct netlink_ext_ack *extack)
143 {
144 	bool max_rate_supported;
145 	int err;
146 
147 	lockdep_assert_held(&esw->state_lock);
148 	max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
149 
150 	if (max_rate && !max_rate_supported)
151 		return -EOPNOTSUPP;
152 	if (max_rate == evport->qos.max_rate)
153 		return 0;
154 
155 	err = esw_qos_vport_config(esw, evport, max_rate, evport->qos.bw_share, extack);
156 	if (!err)
157 		evport->qos.max_rate = max_rate;
158 
159 	return err;
160 }
161 
162 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
163 {
164 	switch (type) {
165 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
166 		return MLX5_CAP_QOS(dev, esw_element_type) &
167 		       ELEMENT_TYPE_CAP_MASK_TASR;
168 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
169 		return MLX5_CAP_QOS(dev, esw_element_type) &
170 		       ELEMENT_TYPE_CAP_MASK_VPORT;
171 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
172 		return MLX5_CAP_QOS(dev, esw_element_type) &
173 		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
174 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
175 		return MLX5_CAP_QOS(dev, esw_element_type) &
176 		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
177 	}
178 	return false;
179 }
180 
181 void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
182 {
183 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
184 	struct mlx5_core_dev *dev = esw->dev;
185 	__be32 *attr;
186 	int err;
187 
188 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
189 		return;
190 
191 	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
192 		return;
193 
194 	if (esw->qos.enabled)
195 		return;
196 
197 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
198 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
199 
200 	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
201 	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
202 
203 	err = mlx5_create_scheduling_element_cmd(dev,
204 						 SCHEDULING_HIERARCHY_E_SWITCH,
205 						 tsar_ctx,
206 						 &esw->qos.root_tsar_ix);
207 	if (err) {
208 		esw_warn(dev, "E-Switch create TSAR failed (%d)\n", err);
209 		return;
210 	}
211 
212 	esw->qos.enabled = true;
213 }
214 
215 void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
216 {
217 	int err;
218 
219 	if (!esw->qos.enabled)
220 		return;
221 
222 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
223 						  SCHEDULING_HIERARCHY_E_SWITCH,
224 						  esw->qos.root_tsar_ix);
225 	if (err)
226 		esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
227 
228 	esw->qos.enabled = false;
229 }
230 
231 int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
232 			      u32 max_rate, u32 bw_share)
233 {
234 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
235 	struct mlx5_core_dev *dev = esw->dev;
236 	void *vport_elem;
237 	int err;
238 
239 	lockdep_assert_held(&esw->state_lock);
240 	if (!esw->qos.enabled)
241 		return 0;
242 
243 	if (vport->qos.enabled)
244 		return -EEXIST;
245 
246 	MLX5_SET(scheduling_context, sched_ctx, element_type,
247 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
248 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
249 	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
250 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_ix);
251 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
252 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
253 
254 	err = mlx5_create_scheduling_element_cmd(dev,
255 						 SCHEDULING_HIERARCHY_E_SWITCH,
256 						 sched_ctx,
257 						 &vport->qos.esw_tsar_ix);
258 	if (err)
259 		esw_warn(dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
260 			 vport->vport, err);
261 	else
262 		vport->qos.enabled = true;
263 
264 	return err;
265 }
266 
267 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
268 {
269 	int err;
270 
271 	lockdep_assert_held(&esw->state_lock);
272 	if (!esw->qos.enabled || !vport->qos.enabled)
273 		return;
274 
275 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
276 						  SCHEDULING_HIERARCHY_E_SWITCH,
277 						  vport->qos.esw_tsar_ix);
278 	if (err)
279 		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
280 			 vport->vport, err);
281 
282 	vport->qos.enabled = false;
283 }
284 
285 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
286 {
287 	u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
288 	struct mlx5_vport *vport;
289 	u32 bitmask;
290 
291 	vport = mlx5_eswitch_get_vport(esw, vport_num);
292 	if (IS_ERR(vport))
293 		return PTR_ERR(vport);
294 
295 	if (!vport->qos.enabled)
296 		return -EOPNOTSUPP;
297 
298 	MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
299 	bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
300 
301 	return mlx5_modify_scheduling_element_cmd(esw->dev,
302 						  SCHEDULING_HIERARCHY_E_SWITCH,
303 						  ctx,
304 						  vport->qos.esw_tsar_ix,
305 						  bitmask);
306 }
307 
308 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
309 
310 /* Converts bytes per second value passed in a pointer into megabits per
311  * second, rewriting last. If converted rate exceed link speed or is not a
312  * fraction of Mbps - returns error.
313  */
314 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
315 					u64 *rate, struct netlink_ext_ack *extack)
316 {
317 	u32 link_speed_max, reminder;
318 	u64 value;
319 	int err;
320 
321 	err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
322 	if (err) {
323 		NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
324 		return err;
325 	}
326 
327 	value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
328 	if (reminder) {
329 		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
330 		       name, *rate);
331 		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
332 		return -EINVAL;
333 	}
334 
335 	if (value > link_speed_max) {
336 		pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
337 		       name, value, link_speed_max);
338 		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
339 		return -EINVAL;
340 	}
341 
342 	*rate = value;
343 	return 0;
344 }
345 
346 /* Eswitch devlink rate API */
347 
348 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
349 					    u64 tx_share, struct netlink_ext_ack *extack)
350 {
351 	struct mlx5_vport *vport = priv;
352 	struct mlx5_eswitch *esw;
353 	int err;
354 
355 	esw = vport->dev->priv.eswitch;
356 	if (!mlx5_esw_allowed(esw))
357 		return -EPERM;
358 
359 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
360 	if (err)
361 		return err;
362 
363 	mutex_lock(&esw->state_lock);
364 	err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
365 	mutex_unlock(&esw->state_lock);
366 	return err;
367 }
368 
369 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
370 					  u64 tx_max, struct netlink_ext_ack *extack)
371 {
372 	struct mlx5_vport *vport = priv;
373 	struct mlx5_eswitch *esw;
374 	int err;
375 
376 	esw = vport->dev->priv.eswitch;
377 	if (!mlx5_esw_allowed(esw))
378 		return -EPERM;
379 
380 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
381 	if (err)
382 		return err;
383 
384 	mutex_lock(&esw->state_lock);
385 	err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
386 	mutex_unlock(&esw->state_lock);
387 	return err;
388 }
389