xref: /openbmc/linux/drivers/infiniband/hw/mlx5/cong.c (revision f97769fd)
1 /*
2  * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/debugfs.h>
34 
35 #include "mlx5_ib.h"
36 #include "cmd.h"
37 
38 enum mlx5_ib_cong_node_type {
39 	MLX5_IB_RROCE_ECN_RP = 1,
40 	MLX5_IB_RROCE_ECN_NP = 2,
41 };
42 
43 static const char * const mlx5_ib_dbg_cc_name[] = {
44 	"rp_clamp_tgt_rate",
45 	"rp_clamp_tgt_rate_ati",
46 	"rp_time_reset",
47 	"rp_byte_reset",
48 	"rp_threshold",
49 	"rp_ai_rate",
50 	"rp_max_rate",
51 	"rp_hai_rate",
52 	"rp_min_dec_fac",
53 	"rp_min_rate",
54 	"rp_rate_to_set_on_first_cnp",
55 	"rp_dce_tcp_g",
56 	"rp_dce_tcp_rtt",
57 	"rp_rate_reduce_monitor_period",
58 	"rp_initial_alpha_value",
59 	"rp_gd",
60 	"np_min_time_between_cnps",
61 	"np_cnp_dscp",
62 	"np_cnp_prio_mode",
63 	"np_cnp_prio",
64 };
65 
66 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR			BIT(1)
67 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR		BIT(2)
68 #define MLX5_IB_RP_TIME_RESET_ATTR			BIT(3)
69 #define MLX5_IB_RP_BYTE_RESET_ATTR			BIT(4)
70 #define MLX5_IB_RP_THRESHOLD_ATTR			BIT(5)
71 #define MLX5_IB_RP_MAX_RATE_ATTR			BIT(6)
72 #define MLX5_IB_RP_AI_RATE_ATTR				BIT(7)
73 #define MLX5_IB_RP_HAI_RATE_ATTR			BIT(8)
74 #define MLX5_IB_RP_MIN_DEC_FAC_ATTR			BIT(9)
75 #define MLX5_IB_RP_MIN_RATE_ATTR			BIT(10)
76 #define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR	BIT(11)
77 #define MLX5_IB_RP_DCE_TCP_G_ATTR			BIT(12)
78 #define MLX5_IB_RP_DCE_TCP_RTT_ATTR			BIT(13)
79 #define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR	BIT(14)
80 #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR		BIT(15)
81 #define MLX5_IB_RP_GD_ATTR				BIT(16)
82 
83 #define MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR		BIT(2)
84 #define MLX5_IB_NP_CNP_DSCP_ATTR			BIT(3)
85 #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR			BIT(4)
86 
87 static enum mlx5_ib_cong_node_type
88 mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
89 {
90 	if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
91 	    param_offset <= MLX5_IB_DBG_CC_RP_GD)
92 		return MLX5_IB_RROCE_ECN_RP;
93 	else
94 		return MLX5_IB_RROCE_ECN_NP;
95 }
96 
97 static u32 mlx5_get_cc_param_val(void *field, int offset)
98 {
99 	switch (offset) {
100 	case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
101 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
102 				clamp_tgt_rate);
103 	case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
104 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
105 				clamp_tgt_rate_after_time_inc);
106 	case MLX5_IB_DBG_CC_RP_TIME_RESET:
107 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
108 				rpg_time_reset);
109 	case MLX5_IB_DBG_CC_RP_BYTE_RESET:
110 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
111 				rpg_byte_reset);
112 	case MLX5_IB_DBG_CC_RP_THRESHOLD:
113 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
114 				rpg_threshold);
115 	case MLX5_IB_DBG_CC_RP_AI_RATE:
116 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
117 				rpg_ai_rate);
118 	case MLX5_IB_DBG_CC_RP_MAX_RATE:
119 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
120 				rpg_max_rate);
121 	case MLX5_IB_DBG_CC_RP_HAI_RATE:
122 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
123 				rpg_hai_rate);
124 	case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
125 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
126 				rpg_min_dec_fac);
127 	case MLX5_IB_DBG_CC_RP_MIN_RATE:
128 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
129 				rpg_min_rate);
130 	case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
131 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
132 				rate_to_set_on_first_cnp);
133 	case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
134 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
135 				dce_tcp_g);
136 	case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
137 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
138 				dce_tcp_rtt);
139 	case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
140 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
141 				rate_reduce_monitor_period);
142 	case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
143 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
144 				initial_alpha_value);
145 	case MLX5_IB_DBG_CC_RP_GD:
146 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
147 				rpg_gd);
148 	case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
149 		return MLX5_GET(cong_control_r_roce_ecn_np, field,
150 				min_time_between_cnps);
151 	case MLX5_IB_DBG_CC_NP_CNP_DSCP:
152 		return MLX5_GET(cong_control_r_roce_ecn_np, field,
153 				cnp_dscp);
154 	case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
155 		return MLX5_GET(cong_control_r_roce_ecn_np, field,
156 				cnp_prio_mode);
157 	case MLX5_IB_DBG_CC_NP_CNP_PRIO:
158 		return MLX5_GET(cong_control_r_roce_ecn_np, field,
159 				cnp_802p_prio);
160 	default:
161 		return 0;
162 	}
163 }
164 
165 static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
166 					  u32 var, u32 *attr_mask)
167 {
168 	switch (offset) {
169 	case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
170 		*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
171 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
172 			 clamp_tgt_rate, var);
173 		break;
174 	case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
175 		*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
176 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
177 			 clamp_tgt_rate_after_time_inc, var);
178 		break;
179 	case MLX5_IB_DBG_CC_RP_TIME_RESET:
180 		*attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
181 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
182 			 rpg_time_reset, var);
183 		break;
184 	case MLX5_IB_DBG_CC_RP_BYTE_RESET:
185 		*attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
186 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
187 			 rpg_byte_reset, var);
188 		break;
189 	case MLX5_IB_DBG_CC_RP_THRESHOLD:
190 		*attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
191 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
192 			 rpg_threshold, var);
193 		break;
194 	case MLX5_IB_DBG_CC_RP_AI_RATE:
195 		*attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
196 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
197 			 rpg_ai_rate, var);
198 		break;
199 	case MLX5_IB_DBG_CC_RP_MAX_RATE:
200 		*attr_mask |= MLX5_IB_RP_MAX_RATE_ATTR;
201 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
202 			 rpg_max_rate, var);
203 		break;
204 	case MLX5_IB_DBG_CC_RP_HAI_RATE:
205 		*attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
206 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
207 			 rpg_hai_rate, var);
208 		break;
209 	case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
210 		*attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
211 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
212 			 rpg_min_dec_fac, var);
213 		break;
214 	case MLX5_IB_DBG_CC_RP_MIN_RATE:
215 		*attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
216 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
217 			 rpg_min_rate, var);
218 		break;
219 	case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
220 		*attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
221 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
222 			 rate_to_set_on_first_cnp, var);
223 		break;
224 	case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
225 		*attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
226 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
227 			 dce_tcp_g, var);
228 		break;
229 	case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
230 		*attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
231 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
232 			 dce_tcp_rtt, var);
233 		break;
234 	case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
235 		*attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
236 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
237 			 rate_reduce_monitor_period, var);
238 		break;
239 	case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
240 		*attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
241 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
242 			 initial_alpha_value, var);
243 		break;
244 	case MLX5_IB_DBG_CC_RP_GD:
245 		*attr_mask |= MLX5_IB_RP_GD_ATTR;
246 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
247 			 rpg_gd, var);
248 		break;
249 	case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
250 		*attr_mask |= MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR;
251 		MLX5_SET(cong_control_r_roce_ecn_np, field,
252 			 min_time_between_cnps, var);
253 		break;
254 	case MLX5_IB_DBG_CC_NP_CNP_DSCP:
255 		*attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
256 		MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
257 		break;
258 	case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
259 		*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
260 		MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
261 		break;
262 	case MLX5_IB_DBG_CC_NP_CNP_PRIO:
263 		*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
264 		MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
265 		MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
266 		break;
267 	}
268 }
269 
270 static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
271 				 int offset, u32 *var)
272 {
273 	int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
274 	void *out;
275 	void *field;
276 	int err;
277 	enum mlx5_ib_cong_node_type node;
278 	struct mlx5_core_dev *mdev;
279 
280 	/* Takes a 1-based port number */
281 	mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
282 	if (!mdev)
283 		return -ENODEV;
284 
285 	out = kvzalloc(outlen, GFP_KERNEL);
286 	if (!out) {
287 		err = -ENOMEM;
288 		goto alloc_err;
289 	}
290 
291 	node = mlx5_ib_param_to_node(offset);
292 
293 	err = mlx5_cmd_query_cong_params(mdev, node, out);
294 	if (err)
295 		goto free;
296 
297 	field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
298 	*var = mlx5_get_cc_param_val(field, offset);
299 
300 free:
301 	kvfree(out);
302 alloc_err:
303 	mlx5_ib_put_native_port_mdev(dev, port_num + 1);
304 	return err;
305 }
306 
307 static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
308 				 int offset, u32 var)
309 {
310 	int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
311 	void *in;
312 	void *field;
313 	enum mlx5_ib_cong_node_type node;
314 	struct mlx5_core_dev *mdev;
315 	u32 attr_mask = 0;
316 	int err;
317 
318 	/* Takes a 1-based port number */
319 	mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
320 	if (!mdev)
321 		return -ENODEV;
322 
323 	in = kvzalloc(inlen, GFP_KERNEL);
324 	if (!in) {
325 		err = -ENOMEM;
326 		goto alloc_err;
327 	}
328 
329 	MLX5_SET(modify_cong_params_in, in, opcode,
330 		 MLX5_CMD_OP_MODIFY_CONG_PARAMS);
331 
332 	node = mlx5_ib_param_to_node(offset);
333 	MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
334 
335 	field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
336 	mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
337 
338 	field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
339 	MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
340 		 attr_mask);
341 
342 	err = mlx5_cmd_exec_in(dev->mdev, modify_cong_params, in);
343 	kvfree(in);
344 alloc_err:
345 	mlx5_ib_put_native_port_mdev(dev, port_num + 1);
346 	return err;
347 }
348 
349 static ssize_t set_param(struct file *filp, const char __user *buf,
350 			 size_t count, loff_t *pos)
351 {
352 	struct mlx5_ib_dbg_param *param = filp->private_data;
353 	int offset = param->offset;
354 	char lbuf[11] = { };
355 	u32 var;
356 	int ret;
357 
358 	if (count > sizeof(lbuf))
359 		return -EINVAL;
360 
361 	if (copy_from_user(lbuf, buf, count))
362 		return -EFAULT;
363 
364 	lbuf[sizeof(lbuf) - 1] = '\0';
365 
366 	if (kstrtou32(lbuf, 0, &var))
367 		return -EINVAL;
368 
369 	ret = mlx5_ib_set_cc_params(param->dev, param->port_num, offset, var);
370 	return ret ? ret : count;
371 }
372 
373 static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
374 			 loff_t *pos)
375 {
376 	struct mlx5_ib_dbg_param *param = filp->private_data;
377 	int offset = param->offset;
378 	u32 var = 0;
379 	int ret;
380 	char lbuf[11];
381 
382 	ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
383 	if (ret)
384 		return ret;
385 
386 	ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
387 	if (ret < 0)
388 		return ret;
389 
390 	return simple_read_from_buffer(buf, count, pos, lbuf, ret);
391 }
392 
393 static const struct file_operations dbg_cc_fops = {
394 	.owner	= THIS_MODULE,
395 	.open	= simple_open,
396 	.write	= set_param,
397 	.read	= get_param,
398 };
399 
400 void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
401 {
402 	if (!mlx5_debugfs_root ||
403 	    !dev->port[port_num].dbg_cc_params ||
404 	    !dev->port[port_num].dbg_cc_params->root)
405 		return;
406 
407 	debugfs_remove_recursive(dev->port[port_num].dbg_cc_params->root);
408 	kfree(dev->port[port_num].dbg_cc_params);
409 	dev->port[port_num].dbg_cc_params = NULL;
410 }
411 
412 void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
413 {
414 	struct mlx5_ib_dbg_cc_params *dbg_cc_params;
415 	struct mlx5_core_dev *mdev;
416 	int i;
417 
418 	if (!mlx5_debugfs_root)
419 		return;
420 
421 	/* Takes a 1-based port number */
422 	mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
423 	if (!mdev)
424 		return;
425 
426 	if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
427 	    !MLX5_CAP_GEN(mdev, cc_modify_allowed))
428 		goto put_mdev;
429 
430 	dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
431 	if (!dbg_cc_params)
432 		goto err;
433 
434 	dev->port[port_num].dbg_cc_params = dbg_cc_params;
435 
436 	dbg_cc_params->root = debugfs_create_dir("cc_params",
437 						 mdev->priv.dbg_root);
438 
439 	for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
440 		dbg_cc_params->params[i].offset = i;
441 		dbg_cc_params->params[i].dev = dev;
442 		dbg_cc_params->params[i].port_num = port_num;
443 		dbg_cc_params->params[i].dentry =
444 			debugfs_create_file(mlx5_ib_dbg_cc_name[i],
445 					    0600, dbg_cc_params->root,
446 					    &dbg_cc_params->params[i],
447 					    &dbg_cc_fops);
448 	}
449 
450 put_mdev:
451 	mlx5_ib_put_native_port_mdev(dev, port_num + 1);
452 	return;
453 
454 err:
455 	mlx5_ib_warn(dev, "cong debugfs failure\n");
456 	mlx5_ib_cleanup_cong_debugfs(dev, port_num);
457 	mlx5_ib_put_native_port_mdev(dev, port_num + 1);
458 
459 	/*
460 	 * We don't want to fail driver if debugfs failed to initialize,
461 	 * so we are not forwarding error to the user.
462 	 */
463 	return;
464 }
465