1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
4  */
5 
6 #include "mlx5_ib.h"
7 #include <linux/mlx5/eswitch.h>
8 #include "counters.h"
9 #include "ib_rep.h"
10 #include "qp.h"
11 
12 struct mlx5_ib_counter {
13 	const char *name;
14 	size_t offset;
15 };
16 
17 #define INIT_Q_COUNTER(_name)		\
18 	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
19 
20 static const struct mlx5_ib_counter basic_q_cnts[] = {
21 	INIT_Q_COUNTER(rx_write_requests),
22 	INIT_Q_COUNTER(rx_read_requests),
23 	INIT_Q_COUNTER(rx_atomic_requests),
24 	INIT_Q_COUNTER(out_of_buffer),
25 };
26 
27 static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
28 	INIT_Q_COUNTER(out_of_sequence),
29 };
30 
31 static const struct mlx5_ib_counter retrans_q_cnts[] = {
32 	INIT_Q_COUNTER(duplicate_request),
33 	INIT_Q_COUNTER(rnr_nak_retry_err),
34 	INIT_Q_COUNTER(packet_seq_err),
35 	INIT_Q_COUNTER(implied_nak_seq_err),
36 	INIT_Q_COUNTER(local_ack_timeout_err),
37 };
38 
39 #define INIT_CONG_COUNTER(_name)		\
40 	{ .name = #_name, .offset =	\
41 		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
42 
43 static const struct mlx5_ib_counter cong_cnts[] = {
44 	INIT_CONG_COUNTER(rp_cnp_ignored),
45 	INIT_CONG_COUNTER(rp_cnp_handled),
46 	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
47 	INIT_CONG_COUNTER(np_cnp_sent),
48 };
49 
50 static const struct mlx5_ib_counter extended_err_cnts[] = {
51 	INIT_Q_COUNTER(resp_local_length_error),
52 	INIT_Q_COUNTER(resp_cqe_error),
53 	INIT_Q_COUNTER(req_cqe_error),
54 	INIT_Q_COUNTER(req_remote_invalid_request),
55 	INIT_Q_COUNTER(req_remote_access_errors),
56 	INIT_Q_COUNTER(resp_remote_access_errors),
57 	INIT_Q_COUNTER(resp_cqe_flush_error),
58 	INIT_Q_COUNTER(req_cqe_flush_error),
59 };
60 
61 static const struct mlx5_ib_counter roce_accl_cnts[] = {
62 	INIT_Q_COUNTER(roce_adp_retrans),
63 	INIT_Q_COUNTER(roce_adp_retrans_to),
64 	INIT_Q_COUNTER(roce_slow_restart),
65 	INIT_Q_COUNTER(roce_slow_restart_cnps),
66 	INIT_Q_COUNTER(roce_slow_restart_trans),
67 };
68 
69 #define INIT_EXT_PPCNT_COUNTER(_name)		\
70 	{ .name = #_name, .offset =	\
71 	MLX5_BYTE_OFF(ppcnt_reg, \
72 		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
73 
74 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
75 	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
76 };
77 
78 static int mlx5_ib_read_counters(struct ib_counters *counters,
79 				 struct ib_counters_read_attr *read_attr,
80 				 struct uverbs_attr_bundle *attrs)
81 {
82 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
83 	struct mlx5_read_counters_attr mread_attr = {};
84 	struct mlx5_ib_flow_counters_desc *desc;
85 	int ret, i;
86 
87 	mutex_lock(&mcounters->mcntrs_mutex);
88 	if (mcounters->cntrs_max_index > read_attr->ncounters) {
89 		ret = -EINVAL;
90 		goto err_bound;
91 	}
92 
93 	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
94 				 GFP_KERNEL);
95 	if (!mread_attr.out) {
96 		ret = -ENOMEM;
97 		goto err_bound;
98 	}
99 
100 	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
101 	mread_attr.flags = read_attr->flags;
102 	ret = mcounters->read_counters(counters->device, &mread_attr);
103 	if (ret)
104 		goto err_read;
105 
106 	/* do the pass over the counters data array to assign according to the
107 	 * descriptions and indexing pairs
108 	 */
109 	desc = mcounters->counters_data;
110 	for (i = 0; i < mcounters->ncounters; i++)
111 		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
112 
113 err_read:
114 	kfree(mread_attr.out);
115 err_bound:
116 	mutex_unlock(&mcounters->mcntrs_mutex);
117 	return ret;
118 }
119 
120 static void mlx5_ib_destroy_counters(struct ib_counters *counters)
121 {
122 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
123 
124 	mlx5_ib_counters_clear_description(counters);
125 	if (mcounters->hw_cntrs_hndl)
126 		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
127 				mcounters->hw_cntrs_hndl);
128 }
129 
130 static int mlx5_ib_create_counters(struct ib_counters *counters,
131 				   struct uverbs_attr_bundle *attrs)
132 {
133 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
134 
135 	mutex_init(&mcounters->mcntrs_mutex);
136 	return 0;
137 }
138 
139 
140 static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
141 {
142 	return MLX5_ESWITCH_MANAGER(mdev) &&
143 	       mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
144 		       MLX5_ESWITCH_OFFLOADS;
145 }
146 
147 static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
148 						   u8 port_num)
149 {
150 	return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
151 						   &dev->port[port_num].cnts;
152 }
153 
154 /**
155  * mlx5_ib_get_counters_id - Returns counters id to use for device+port
156  * @dev:	Pointer to mlx5 IB device
157  * @port_num:	Zero based port number
158  *
159  * mlx5_ib_get_counters_id() Returns counters set id to use for given
160  * device port combination in switchdev and non switchdev mode of the
161  * parent device.
162  */
163 u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
164 {
165 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
166 
167 	return cnts->set_id;
168 }
169 
170 static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
171 						    u8 port_num)
172 {
173 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
174 	const struct mlx5_ib_counters *cnts;
175 	bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
176 
177 	if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
178 		return NULL;
179 
180 	cnts = get_counters(dev, port_num - 1);
181 
182 	return rdma_alloc_hw_stats_struct(cnts->names,
183 					  cnts->num_q_counters +
184 					  cnts->num_cong_counters +
185 					  cnts->num_ext_ppcnt_counters,
186 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
187 }
188 
189 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
190 				    const struct mlx5_ib_counters *cnts,
191 				    struct rdma_hw_stats *stats,
192 				    u16 set_id)
193 {
194 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
195 	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
196 	__be32 val;
197 	int ret, i;
198 
199 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
200 	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
201 	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
202 	if (ret)
203 		return ret;
204 
205 	for (i = 0; i < cnts->num_q_counters; i++) {
206 		val = *(__be32 *)((void *)out + cnts->offsets[i]);
207 		stats->value[i] = (u64)be32_to_cpu(val);
208 	}
209 
210 	return 0;
211 }
212 
213 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
214 					    const struct mlx5_ib_counters *cnts,
215 					    struct rdma_hw_stats *stats)
216 {
217 	int offset = cnts->num_q_counters + cnts->num_cong_counters;
218 	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
219 	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
220 	int ret, i;
221 	void *out;
222 
223 	out = kvzalloc(sz, GFP_KERNEL);
224 	if (!out)
225 		return -ENOMEM;
226 
227 	MLX5_SET(ppcnt_reg, in, local_port, 1);
228 	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
229 	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
230 				   0, 0);
231 	if (ret)
232 		goto free;
233 
234 	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
235 		stats->value[i + offset] =
236 			be64_to_cpup((__be64 *)(out +
237 				    cnts->offsets[i + offset]));
238 free:
239 	kvfree(out);
240 	return ret;
241 }
242 
243 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
244 				struct rdma_hw_stats *stats,
245 				u8 port_num, int index)
246 {
247 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
248 	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
249 	struct mlx5_core_dev *mdev;
250 	int ret, num_counters;
251 	u8 mdev_port_num;
252 
253 	if (!stats)
254 		return -EINVAL;
255 
256 	num_counters = cnts->num_q_counters +
257 		       cnts->num_cong_counters +
258 		       cnts->num_ext_ppcnt_counters;
259 
260 	/* q_counters are per IB device, query the master mdev */
261 	ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
262 	if (ret)
263 		return ret;
264 
265 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
266 		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
267 		if (ret)
268 			return ret;
269 	}
270 
271 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
272 		mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
273 						    &mdev_port_num);
274 		if (!mdev) {
275 			/* If port is not affiliated yet, its in down state
276 			 * which doesn't have any counters yet, so it would be
277 			 * zero. So no need to read from the HCA.
278 			 */
279 			goto done;
280 		}
281 		ret = mlx5_lag_query_cong_counters(dev->mdev,
282 						   stats->value +
283 						   cnts->num_q_counters,
284 						   cnts->num_cong_counters,
285 						   cnts->offsets +
286 						   cnts->num_q_counters);
287 
288 		mlx5_ib_put_native_port_mdev(dev, port_num);
289 		if (ret)
290 			return ret;
291 	}
292 
293 done:
294 	return num_counters;
295 }
296 
297 static struct rdma_hw_stats *
298 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
299 {
300 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
301 	const struct mlx5_ib_counters *cnts =
302 		get_counters(dev, counter->port - 1);
303 
304 	return rdma_alloc_hw_stats_struct(cnts->names,
305 					  cnts->num_q_counters +
306 					  cnts->num_cong_counters +
307 					  cnts->num_ext_ppcnt_counters,
308 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
309 }
310 
311 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
312 {
313 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
314 	const struct mlx5_ib_counters *cnts =
315 		get_counters(dev, counter->port - 1);
316 
317 	return mlx5_ib_query_q_counters(dev->mdev, cnts,
318 					counter->stats, counter->id);
319 }
320 
321 static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
322 {
323 	struct mlx5_ib_dev *dev = to_mdev(counter->device);
324 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
325 
326 	if (!counter->id)
327 		return 0;
328 
329 	MLX5_SET(dealloc_q_counter_in, in, opcode,
330 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
331 	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
332 	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
333 }
334 
335 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
336 				   struct ib_qp *qp)
337 {
338 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
339 	int err;
340 
341 	if (!counter->id) {
342 		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
343 		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
344 
345 		MLX5_SET(alloc_q_counter_in, in, opcode,
346 			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
347 		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
348 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
349 		if (err)
350 			return err;
351 		counter->id =
352 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
353 	}
354 
355 	err = mlx5_ib_qp_set_counter(qp, counter);
356 	if (err)
357 		goto fail_set_counter;
358 
359 	return 0;
360 
361 fail_set_counter:
362 	mlx5_ib_counter_dealloc(counter);
363 	counter->id = 0;
364 
365 	return err;
366 }
367 
368 static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
369 {
370 	return mlx5_ib_qp_set_counter(qp, NULL);
371 }
372 
373 
374 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
375 				  const char **names,
376 				  size_t *offsets)
377 {
378 	int i;
379 	int j = 0;
380 
381 	for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
382 		names[j] = basic_q_cnts[i].name;
383 		offsets[j] = basic_q_cnts[i].offset;
384 	}
385 
386 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
387 		for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
388 			names[j] = out_of_seq_q_cnts[i].name;
389 			offsets[j] = out_of_seq_q_cnts[i].offset;
390 		}
391 	}
392 
393 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
394 		for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
395 			names[j] = retrans_q_cnts[i].name;
396 			offsets[j] = retrans_q_cnts[i].offset;
397 		}
398 	}
399 
400 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
401 		for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
402 			names[j] = extended_err_cnts[i].name;
403 			offsets[j] = extended_err_cnts[i].offset;
404 		}
405 	}
406 
407 	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
408 		for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
409 			names[j] = roce_accl_cnts[i].name;
410 			offsets[j] = roce_accl_cnts[i].offset;
411 		}
412 	}
413 
414 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
415 		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
416 			names[j] = cong_cnts[i].name;
417 			offsets[j] = cong_cnts[i].offset;
418 		}
419 	}
420 
421 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
422 		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
423 			names[j] = ext_ppcnt_cnts[i].name;
424 			offsets[j] = ext_ppcnt_cnts[i].offset;
425 		}
426 	}
427 }
428 
429 
430 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
431 				    struct mlx5_ib_counters *cnts)
432 {
433 	u32 num_counters;
434 
435 	num_counters = ARRAY_SIZE(basic_q_cnts);
436 
437 	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
438 		num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
439 
440 	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
441 		num_counters += ARRAY_SIZE(retrans_q_cnts);
442 
443 	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
444 		num_counters += ARRAY_SIZE(extended_err_cnts);
445 
446 	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
447 		num_counters += ARRAY_SIZE(roce_accl_cnts);
448 
449 	cnts->num_q_counters = num_counters;
450 
451 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
452 		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
453 		num_counters += ARRAY_SIZE(cong_cnts);
454 	}
455 	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
456 		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
457 		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
458 	}
459 	cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
460 	if (!cnts->names)
461 		return -ENOMEM;
462 
463 	cnts->offsets = kcalloc(num_counters,
464 				sizeof(cnts->offsets), GFP_KERNEL);
465 	if (!cnts->offsets)
466 		goto err_names;
467 
468 	return 0;
469 
470 err_names:
471 	kfree(cnts->names);
472 	cnts->names = NULL;
473 	return -ENOMEM;
474 }
475 
476 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
477 {
478 	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
479 	int num_cnt_ports;
480 	int i;
481 
482 	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
483 
484 	MLX5_SET(dealloc_q_counter_in, in, opcode,
485 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
486 
487 	for (i = 0; i < num_cnt_ports; i++) {
488 		if (dev->port[i].cnts.set_id) {
489 			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
490 				 dev->port[i].cnts.set_id);
491 			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
492 		}
493 		kfree(dev->port[i].cnts.names);
494 		kfree(dev->port[i].cnts.offsets);
495 	}
496 }
497 
498 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
499 {
500 	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
501 	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
502 	int num_cnt_ports;
503 	int err = 0;
504 	int i;
505 	bool is_shared;
506 
507 	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
508 	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
509 	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
510 
511 	for (i = 0; i < num_cnt_ports; i++) {
512 		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
513 		if (err)
514 			goto err_alloc;
515 
516 		mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
517 				      dev->port[i].cnts.offsets);
518 
519 		MLX5_SET(alloc_q_counter_in, in, uid,
520 			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
521 
522 		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
523 		if (err) {
524 			mlx5_ib_warn(dev,
525 				     "couldn't allocate queue counter for port %d, err %d\n",
526 				     i + 1, err);
527 			goto err_alloc;
528 		}
529 
530 		dev->port[i].cnts.set_id =
531 			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
532 	}
533 	return 0;
534 
535 err_alloc:
536 	mlx5_ib_dealloc_counters(dev);
537 	return err;
538 }
539 
540 static int read_flow_counters(struct ib_device *ibdev,
541 			      struct mlx5_read_counters_attr *read_attr)
542 {
543 	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
544 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
545 
546 	return mlx5_fc_query(dev->mdev, fc,
547 			     &read_attr->out[IB_COUNTER_PACKETS],
548 			     &read_attr->out[IB_COUNTER_BYTES]);
549 }
550 
551 /* flow counters currently expose two counters packets and bytes */
552 #define FLOW_COUNTERS_NUM 2
553 static int counters_set_description(
554 	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
555 	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
556 {
557 	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
558 	u32 cntrs_max_index = 0;
559 	int i;
560 
561 	if (counters_type != MLX5_IB_COUNTERS_FLOW)
562 		return -EINVAL;
563 
564 	/* init the fields for the object */
565 	mcounters->type = counters_type;
566 	mcounters->read_counters = read_flow_counters;
567 	mcounters->counters_num = FLOW_COUNTERS_NUM;
568 	mcounters->ncounters = ncounters;
569 	/* each counter entry have both description and index pair */
570 	for (i = 0; i < ncounters; i++) {
571 		if (desc_data[i].description > IB_COUNTER_BYTES)
572 			return -EINVAL;
573 
574 		if (cntrs_max_index <= desc_data[i].index)
575 			cntrs_max_index = desc_data[i].index + 1;
576 	}
577 
578 	mutex_lock(&mcounters->mcntrs_mutex);
579 	mcounters->counters_data = desc_data;
580 	mcounters->cntrs_max_index = cntrs_max_index;
581 	mutex_unlock(&mcounters->mcntrs_mutex);
582 
583 	return 0;
584 }
585 
586 #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
587 int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
588 				   struct mlx5_ib_create_flow *ucmd)
589 {
590 	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
591 	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
592 	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
593 	bool hw_hndl = false;
594 	int ret = 0;
595 
596 	if (ucmd && ucmd->ncounters_data != 0) {
597 		cntrs_data = ucmd->data;
598 		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
599 			return -EINVAL;
600 
601 		desc_data = kcalloc(cntrs_data->ncounters,
602 				    sizeof(*desc_data),
603 				    GFP_KERNEL);
604 		if (!desc_data)
605 			return  -ENOMEM;
606 
607 		if (copy_from_user(desc_data,
608 				   u64_to_user_ptr(cntrs_data->counters_data),
609 				   sizeof(*desc_data) * cntrs_data->ncounters)) {
610 			ret = -EFAULT;
611 			goto free;
612 		}
613 	}
614 
615 	if (!mcounters->hw_cntrs_hndl) {
616 		mcounters->hw_cntrs_hndl = mlx5_fc_create(
617 			to_mdev(ibcounters->device)->mdev, false);
618 		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
619 			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
620 			goto free;
621 		}
622 		hw_hndl = true;
623 	}
624 
625 	if (desc_data) {
626 		/* counters already bound to at least one flow */
627 		if (mcounters->cntrs_max_index) {
628 			ret = -EINVAL;
629 			goto free_hndl;
630 		}
631 
632 		ret = counters_set_description(ibcounters,
633 					       MLX5_IB_COUNTERS_FLOW,
634 					       desc_data,
635 					       cntrs_data->ncounters);
636 		if (ret)
637 			goto free_hndl;
638 
639 	} else if (!mcounters->cntrs_max_index) {
640 		/* counters not bound yet, must have udata passed */
641 		ret = -EINVAL;
642 		goto free_hndl;
643 	}
644 
645 	return 0;
646 
647 free_hndl:
648 	if (hw_hndl) {
649 		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
650 				mcounters->hw_cntrs_hndl);
651 		mcounters->hw_cntrs_hndl = NULL;
652 	}
653 free:
654 	kfree(desc_data);
655 	return ret;
656 }
657 
658 void mlx5_ib_counters_clear_description(struct ib_counters *counters)
659 {
660 	struct mlx5_ib_mcounters *mcounters;
661 
662 	if (!counters || atomic_read(&counters->usecnt) != 1)
663 		return;
664 
665 	mcounters = to_mcounters(counters);
666 
667 	mutex_lock(&mcounters->mcntrs_mutex);
668 	kfree(mcounters->counters_data);
669 	mcounters->counters_data = NULL;
670 	mcounters->cntrs_max_index = 0;
671 	mutex_unlock(&mcounters->mcntrs_mutex);
672 }
673 
674 static const struct ib_device_ops hw_stats_ops = {
675 	.alloc_hw_stats = mlx5_ib_alloc_hw_stats,
676 	.get_hw_stats = mlx5_ib_get_hw_stats,
677 	.counter_bind_qp = mlx5_ib_counter_bind_qp,
678 	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
679 	.counter_dealloc = mlx5_ib_counter_dealloc,
680 	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
681 	.counter_update_stats = mlx5_ib_counter_update_stats,
682 };
683 
684 static const struct ib_device_ops counters_ops = {
685 	.create_counters = mlx5_ib_create_counters,
686 	.destroy_counters = mlx5_ib_destroy_counters,
687 	.read_counters = mlx5_ib_read_counters,
688 
689 	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
690 };
691 
692 int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
693 {
694 	ib_set_device_ops(&dev->ib_dev, &counters_ops);
695 
696 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
697 		return 0;
698 
699 	ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
700 	return mlx5_ib_alloc_counters(dev);
701 }
702 
703 void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
704 {
705 	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
706 		return;
707 
708 	mlx5_ib_dealloc_counters(dev);
709 }
710