xref: /openbmc/linux/drivers/infiniband/hw/mlx5/cong.c (revision c4c3c32d)
1 /*
2  * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/debugfs.h>
34 
35 #include "mlx5_ib.h"
36 #include "cmd.h"
37 
38 enum mlx5_ib_cong_node_type {
39 	MLX5_IB_RROCE_ECN_RP = 1,
40 	MLX5_IB_RROCE_ECN_NP = 2,
41 	MLX5_IB_RROCE_GENERAL = 3,
42 };
43 
44 static const char * const mlx5_ib_dbg_cc_name[] = {
45 	"rp_clamp_tgt_rate",
46 	"rp_clamp_tgt_rate_ati",
47 	"rp_time_reset",
48 	"rp_byte_reset",
49 	"rp_threshold",
50 	"rp_ai_rate",
51 	"rp_max_rate",
52 	"rp_hai_rate",
53 	"rp_min_dec_fac",
54 	"rp_min_rate",
55 	"rp_rate_to_set_on_first_cnp",
56 	"rp_dce_tcp_g",
57 	"rp_dce_tcp_rtt",
58 	"rp_rate_reduce_monitor_period",
59 	"rp_initial_alpha_value",
60 	"rp_gd",
61 	"np_min_time_between_cnps",
62 	"np_cnp_dscp",
63 	"np_cnp_prio_mode",
64 	"np_cnp_prio",
65 	"rtt_resp_dscp_valid",
66 	"rtt_resp_dscp",
67 };
68 
69 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR			BIT(1)
70 #define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR		BIT(2)
71 #define MLX5_IB_RP_TIME_RESET_ATTR			BIT(3)
72 #define MLX5_IB_RP_BYTE_RESET_ATTR			BIT(4)
73 #define MLX5_IB_RP_THRESHOLD_ATTR			BIT(5)
74 #define MLX5_IB_RP_MAX_RATE_ATTR			BIT(6)
75 #define MLX5_IB_RP_AI_RATE_ATTR				BIT(7)
76 #define MLX5_IB_RP_HAI_RATE_ATTR			BIT(8)
77 #define MLX5_IB_RP_MIN_DEC_FAC_ATTR			BIT(9)
78 #define MLX5_IB_RP_MIN_RATE_ATTR			BIT(10)
79 #define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR	BIT(11)
80 #define MLX5_IB_RP_DCE_TCP_G_ATTR			BIT(12)
81 #define MLX5_IB_RP_DCE_TCP_RTT_ATTR			BIT(13)
82 #define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR	BIT(14)
83 #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR		BIT(15)
84 #define MLX5_IB_RP_GD_ATTR				BIT(16)
85 
86 #define MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR		BIT(2)
87 #define MLX5_IB_NP_CNP_DSCP_ATTR			BIT(3)
88 #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR			BIT(4)
89 
90 #define MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR		BIT(0)
91 
92 static enum mlx5_ib_cong_node_type
93 mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
94 {
95 	if (param_offset <= MLX5_IB_DBG_CC_RP_GD)
96 		return MLX5_IB_RROCE_ECN_RP;
97 
98 	if (param_offset <= MLX5_IB_DBG_CC_NP_CNP_PRIO)
99 		return MLX5_IB_RROCE_ECN_NP;
100 
101 	return MLX5_IB_RROCE_GENERAL;
102 }
103 
104 static u32 mlx5_get_cc_param_val(void *field, int offset)
105 {
106 	switch (offset) {
107 	case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
108 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
109 				clamp_tgt_rate);
110 	case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
111 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
112 				clamp_tgt_rate_after_time_inc);
113 	case MLX5_IB_DBG_CC_RP_TIME_RESET:
114 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
115 				rpg_time_reset);
116 	case MLX5_IB_DBG_CC_RP_BYTE_RESET:
117 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
118 				rpg_byte_reset);
119 	case MLX5_IB_DBG_CC_RP_THRESHOLD:
120 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
121 				rpg_threshold);
122 	case MLX5_IB_DBG_CC_RP_AI_RATE:
123 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
124 				rpg_ai_rate);
125 	case MLX5_IB_DBG_CC_RP_MAX_RATE:
126 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
127 				rpg_max_rate);
128 	case MLX5_IB_DBG_CC_RP_HAI_RATE:
129 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
130 				rpg_hai_rate);
131 	case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
132 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
133 				rpg_min_dec_fac);
134 	case MLX5_IB_DBG_CC_RP_MIN_RATE:
135 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
136 				rpg_min_rate);
137 	case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
138 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
139 				rate_to_set_on_first_cnp);
140 	case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
141 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
142 				dce_tcp_g);
143 	case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
144 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
145 				dce_tcp_rtt);
146 	case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
147 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
148 				rate_reduce_monitor_period);
149 	case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
150 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
151 				initial_alpha_value);
152 	case MLX5_IB_DBG_CC_RP_GD:
153 		return MLX5_GET(cong_control_r_roce_ecn_rp, field,
154 				rpg_gd);
155 	case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
156 		return MLX5_GET(cong_control_r_roce_ecn_np, field,
157 				min_time_between_cnps);
158 	case MLX5_IB_DBG_CC_NP_CNP_DSCP:
159 		return MLX5_GET(cong_control_r_roce_ecn_np, field,
160 				cnp_dscp);
161 	case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
162 		return MLX5_GET(cong_control_r_roce_ecn_np, field,
163 				cnp_prio_mode);
164 	case MLX5_IB_DBG_CC_NP_CNP_PRIO:
165 		return MLX5_GET(cong_control_r_roce_ecn_np, field,
166 				cnp_802p_prio);
167 	case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID:
168 		return MLX5_GET(cong_control_r_roce_general, field,
169 				rtt_resp_dscp_valid);
170 	case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP:
171 		return MLX5_GET(cong_control_r_roce_general, field,
172 				rtt_resp_dscp);
173 	default:
174 		return 0;
175 	}
176 }
177 
178 static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
179 					  u32 var, u32 *attr_mask)
180 {
181 	switch (offset) {
182 	case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
183 		*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
184 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
185 			 clamp_tgt_rate, var);
186 		break;
187 	case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
188 		*attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
189 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
190 			 clamp_tgt_rate_after_time_inc, var);
191 		break;
192 	case MLX5_IB_DBG_CC_RP_TIME_RESET:
193 		*attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
194 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
195 			 rpg_time_reset, var);
196 		break;
197 	case MLX5_IB_DBG_CC_RP_BYTE_RESET:
198 		*attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
199 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
200 			 rpg_byte_reset, var);
201 		break;
202 	case MLX5_IB_DBG_CC_RP_THRESHOLD:
203 		*attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
204 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
205 			 rpg_threshold, var);
206 		break;
207 	case MLX5_IB_DBG_CC_RP_AI_RATE:
208 		*attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
209 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
210 			 rpg_ai_rate, var);
211 		break;
212 	case MLX5_IB_DBG_CC_RP_MAX_RATE:
213 		*attr_mask |= MLX5_IB_RP_MAX_RATE_ATTR;
214 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
215 			 rpg_max_rate, var);
216 		break;
217 	case MLX5_IB_DBG_CC_RP_HAI_RATE:
218 		*attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
219 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
220 			 rpg_hai_rate, var);
221 		break;
222 	case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
223 		*attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
224 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
225 			 rpg_min_dec_fac, var);
226 		break;
227 	case MLX5_IB_DBG_CC_RP_MIN_RATE:
228 		*attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
229 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
230 			 rpg_min_rate, var);
231 		break;
232 	case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
233 		*attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
234 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
235 			 rate_to_set_on_first_cnp, var);
236 		break;
237 	case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
238 		*attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
239 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
240 			 dce_tcp_g, var);
241 		break;
242 	case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
243 		*attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
244 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
245 			 dce_tcp_rtt, var);
246 		break;
247 	case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
248 		*attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
249 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
250 			 rate_reduce_monitor_period, var);
251 		break;
252 	case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
253 		*attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
254 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
255 			 initial_alpha_value, var);
256 		break;
257 	case MLX5_IB_DBG_CC_RP_GD:
258 		*attr_mask |= MLX5_IB_RP_GD_ATTR;
259 		MLX5_SET(cong_control_r_roce_ecn_rp, field,
260 			 rpg_gd, var);
261 		break;
262 	case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
263 		*attr_mask |= MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR;
264 		MLX5_SET(cong_control_r_roce_ecn_np, field,
265 			 min_time_between_cnps, var);
266 		break;
267 	case MLX5_IB_DBG_CC_NP_CNP_DSCP:
268 		*attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
269 		MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
270 		break;
271 	case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
272 		*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
273 		MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
274 		break;
275 	case MLX5_IB_DBG_CC_NP_CNP_PRIO:
276 		*attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
277 		MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
278 		MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
279 		break;
280 	case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID:
281 		*attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR;
282 		MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, var);
283 		break;
284 	case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP:
285 		*attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR;
286 		MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, 1);
287 		MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp, var);
288 		break;
289 	}
290 }
291 
292 static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
293 				 int offset, u32 *var)
294 {
295 	int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
296 	void *out;
297 	void *field;
298 	int err;
299 	enum mlx5_ib_cong_node_type node;
300 	struct mlx5_core_dev *mdev;
301 
302 	/* Takes a 1-based port number */
303 	mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
304 	if (!mdev)
305 		return -ENODEV;
306 
307 	out = kvzalloc(outlen, GFP_KERNEL);
308 	if (!out) {
309 		err = -ENOMEM;
310 		goto alloc_err;
311 	}
312 
313 	node = mlx5_ib_param_to_node(offset);
314 
315 	err = mlx5_cmd_query_cong_params(mdev, node, out);
316 	if (err)
317 		goto free;
318 
319 	field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
320 	*var = mlx5_get_cc_param_val(field, offset);
321 
322 free:
323 	kvfree(out);
324 alloc_err:
325 	mlx5_ib_put_native_port_mdev(dev, port_num + 1);
326 	return err;
327 }
328 
329 static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
330 				 int offset, u32 var)
331 {
332 	int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
333 	void *in;
334 	void *field;
335 	enum mlx5_ib_cong_node_type node;
336 	struct mlx5_core_dev *mdev;
337 	u32 attr_mask = 0;
338 	int err;
339 
340 	/* Takes a 1-based port number */
341 	mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
342 	if (!mdev)
343 		return -ENODEV;
344 
345 	in = kvzalloc(inlen, GFP_KERNEL);
346 	if (!in) {
347 		err = -ENOMEM;
348 		goto alloc_err;
349 	}
350 
351 	MLX5_SET(modify_cong_params_in, in, opcode,
352 		 MLX5_CMD_OP_MODIFY_CONG_PARAMS);
353 
354 	node = mlx5_ib_param_to_node(offset);
355 	MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
356 
357 	field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
358 	mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
359 
360 	field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
361 	MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
362 		 attr_mask);
363 
364 	err = mlx5_cmd_exec_in(dev->mdev, modify_cong_params, in);
365 	kvfree(in);
366 alloc_err:
367 	mlx5_ib_put_native_port_mdev(dev, port_num + 1);
368 	return err;
369 }
370 
371 static ssize_t set_param(struct file *filp, const char __user *buf,
372 			 size_t count, loff_t *pos)
373 {
374 	struct mlx5_ib_dbg_param *param = filp->private_data;
375 	int offset = param->offset;
376 	char lbuf[11] = { };
377 	u32 var;
378 	int ret;
379 
380 	if (count > sizeof(lbuf))
381 		return -EINVAL;
382 
383 	if (copy_from_user(lbuf, buf, count))
384 		return -EFAULT;
385 
386 	lbuf[sizeof(lbuf) - 1] = '\0';
387 
388 	if (kstrtou32(lbuf, 0, &var))
389 		return -EINVAL;
390 
391 	ret = mlx5_ib_set_cc_params(param->dev, param->port_num, offset, var);
392 	return ret ? ret : count;
393 }
394 
395 static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
396 			 loff_t *pos)
397 {
398 	struct mlx5_ib_dbg_param *param = filp->private_data;
399 	int offset = param->offset;
400 	u32 var = 0;
401 	int ret;
402 	char lbuf[11];
403 
404 	ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
405 	if (ret)
406 		return ret;
407 
408 	ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
409 	if (ret < 0)
410 		return ret;
411 
412 	return simple_read_from_buffer(buf, count, pos, lbuf, ret);
413 }
414 
415 static const struct file_operations dbg_cc_fops = {
416 	.owner	= THIS_MODULE,
417 	.open	= simple_open,
418 	.write	= set_param,
419 	.read	= get_param,
420 };
421 
422 void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
423 {
424 	if (!mlx5_debugfs_root ||
425 	    !dev->port[port_num].dbg_cc_params ||
426 	    !dev->port[port_num].dbg_cc_params->root)
427 		return;
428 
429 	debugfs_remove_recursive(dev->port[port_num].dbg_cc_params->root);
430 	kfree(dev->port[port_num].dbg_cc_params);
431 	dev->port[port_num].dbg_cc_params = NULL;
432 }
433 
434 void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
435 {
436 	struct mlx5_ib_dbg_cc_params *dbg_cc_params;
437 	struct mlx5_core_dev *mdev;
438 	int i;
439 
440 	if (!mlx5_debugfs_root)
441 		return;
442 
443 	/* Takes a 1-based port number */
444 	mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
445 	if (!mdev)
446 		return;
447 
448 	if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
449 	    !MLX5_CAP_GEN(mdev, cc_modify_allowed))
450 		goto put_mdev;
451 
452 	dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
453 	if (!dbg_cc_params)
454 		goto err;
455 
456 	dev->port[port_num].dbg_cc_params = dbg_cc_params;
457 
458 	dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev));
459 
460 	for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
461 		dbg_cc_params->params[i].offset = i;
462 		dbg_cc_params->params[i].dev = dev;
463 		dbg_cc_params->params[i].port_num = port_num;
464 		dbg_cc_params->params[i].dentry =
465 			debugfs_create_file(mlx5_ib_dbg_cc_name[i],
466 					    0600, dbg_cc_params->root,
467 					    &dbg_cc_params->params[i],
468 					    &dbg_cc_fops);
469 	}
470 
471 put_mdev:
472 	mlx5_ib_put_native_port_mdev(dev, port_num + 1);
473 	return;
474 
475 err:
476 	mlx5_ib_warn(dev, "cong debugfs failure\n");
477 	mlx5_ib_cleanup_cong_debugfs(dev, port_num);
478 	mlx5_ib_put_native_port_mdev(dev, port_num + 1);
479 
480 	/*
481 	 * We don't want to fail driver if debugfs failed to initialize,
482 	 * so we are not forwarding error to the user.
483 	 */
484 	return;
485 }
486