1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/smp.h>
5 #include "dr_types.h"
6 
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10 #define DR_SEND_INFO_POOL_SIZE 1000
11 
12 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
13 
14 struct dr_data_seg {
15 	u64 addr;
16 	u32 length;
17 	u32 lkey;
18 	unsigned int send_flags;
19 };
20 
21 enum send_info_type {
22 	WRITE_ICM = 0,
23 	GTA_ARG   = 1,
24 };
25 
26 struct postsend_info {
27 	enum send_info_type type;
28 	struct dr_data_seg write;
29 	struct dr_data_seg read;
30 	u64 remote_addr;
31 	u32 rkey;
32 };
33 
34 struct dr_qp_rtr_attr {
35 	struct mlx5dr_cmd_gid_attr dgid_attr;
36 	enum ib_mtu mtu;
37 	u32 qp_num;
38 	u16 port_num;
39 	u8 min_rnr_timer;
40 	u8 sgid_index;
41 	u16 udp_src_port;
42 	u8 fl:1;
43 };
44 
45 struct dr_qp_rts_attr {
46 	u8 timeout;
47 	u8 retry_cnt;
48 	u8 rnr_retry;
49 };
50 
51 struct dr_qp_init_attr {
52 	u32 cqn;
53 	u32 pdn;
54 	u32 max_send_wr;
55 	u32 max_send_sge;
56 	struct mlx5_uars_page *uar;
57 	u8 isolate_vl_tc:1;
58 };
59 
60 struct mlx5dr_send_info_pool_obj {
61 	struct mlx5dr_ste_send_info ste_send_info;
62 	struct mlx5dr_send_info_pool *pool;
63 	struct list_head list_node;
64 };
65 
66 struct mlx5dr_send_info_pool {
67 	struct list_head free_list;
68 };
69 
70 static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool)
71 {
72 	struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
73 	int i;
74 
75 	for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) {
76 		pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL);
77 		if (!pool_obj)
78 			goto clean_pool;
79 
80 		pool_obj->pool = pool;
81 		list_add_tail(&pool_obj->list_node, &pool->free_list);
82 	}
83 
84 	return 0;
85 
86 clean_pool:
87 	list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
88 		list_del(&pool_obj->list_node);
89 		kfree(pool_obj);
90 	}
91 
92 	return -ENOMEM;
93 }
94 
95 static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool)
96 {
97 	struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
98 
99 	list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
100 		list_del(&pool_obj->list_node);
101 		kfree(pool_obj);
102 	}
103 
104 	kfree(pool);
105 }
106 
107 void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn)
108 {
109 	dr_send_info_pool_destroy(dmn->send_info_pool_tx);
110 	dr_send_info_pool_destroy(dmn->send_info_pool_rx);
111 }
112 
113 static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void)
114 {
115 	struct mlx5dr_send_info_pool *pool;
116 	int ret;
117 
118 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
119 	if (!pool)
120 		return NULL;
121 
122 	INIT_LIST_HEAD(&pool->free_list);
123 
124 	ret = dr_send_info_pool_fill(pool);
125 	if (ret) {
126 		kfree(pool);
127 		return NULL;
128 	}
129 
130 	return pool;
131 }
132 
133 int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn)
134 {
135 	dmn->send_info_pool_rx = dr_send_info_pool_create();
136 	if (!dmn->send_info_pool_rx)
137 		return -ENOMEM;
138 
139 	dmn->send_info_pool_tx = dr_send_info_pool_create();
140 	if (!dmn->send_info_pool_tx) {
141 		dr_send_info_pool_destroy(dmn->send_info_pool_rx);
142 		return -ENOMEM;
143 	}
144 
145 	return 0;
146 }
147 
148 struct mlx5dr_ste_send_info
149 *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn,
150 			enum mlx5dr_domain_nic_type nic_type)
151 {
152 	struct mlx5dr_send_info_pool_obj *pool_obj;
153 	struct mlx5dr_send_info_pool *pool;
154 	int ret;
155 
156 	pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx :
157 						   dmn->send_info_pool_tx;
158 
159 	if (unlikely(list_empty(&pool->free_list))) {
160 		ret = dr_send_info_pool_fill(pool);
161 		if (ret)
162 			return NULL;
163 	}
164 
165 	pool_obj = list_first_entry_or_null(&pool->free_list,
166 					    struct mlx5dr_send_info_pool_obj,
167 					    list_node);
168 
169 	if (likely(pool_obj)) {
170 		list_del_init(&pool_obj->list_node);
171 	} else {
172 		WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool");
173 		return NULL;
174 	}
175 
176 	return &pool_obj->ste_send_info;
177 }
178 
179 void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info)
180 {
181 	struct mlx5dr_send_info_pool_obj *pool_obj;
182 
183 	pool_obj = container_of(ste_send_info,
184 				struct mlx5dr_send_info_pool_obj,
185 				ste_send_info);
186 
187 	list_add(&pool_obj->list_node, &pool_obj->pool->free_list);
188 }
189 
190 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
191 {
192 	unsigned int idx;
193 	u8 opcode;
194 
195 	opcode = get_cqe_opcode(cqe64);
196 	if (opcode == MLX5_CQE_REQ_ERR) {
197 		idx = be16_to_cpu(cqe64->wqe_counter) &
198 			(dr_cq->qp->sq.wqe_cnt - 1);
199 		dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
200 	} else if (opcode == MLX5_CQE_RESP_ERR) {
201 		++dr_cq->qp->sq.cc;
202 	} else {
203 		idx = be16_to_cpu(cqe64->wqe_counter) &
204 			(dr_cq->qp->sq.wqe_cnt - 1);
205 		dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
206 
207 		return CQ_OK;
208 	}
209 
210 	return CQ_POLL_ERR;
211 }
212 
213 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
214 {
215 	struct mlx5_cqe64 *cqe64;
216 	int err;
217 
218 	cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
219 	if (!cqe64) {
220 		if (unlikely(dr_cq->mdev->state ==
221 			     MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
222 			mlx5_core_dbg_once(dr_cq->mdev,
223 					   "Polling CQ while device is shutting down\n");
224 			return CQ_POLL_ERR;
225 		}
226 		return CQ_EMPTY;
227 	}
228 
229 	mlx5_cqwq_pop(&dr_cq->wq);
230 	err = dr_parse_cqe(dr_cq, cqe64);
231 	mlx5_cqwq_update_db_record(&dr_cq->wq);
232 
233 	return err;
234 }
235 
236 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
237 {
238 	int npolled;
239 	int err = 0;
240 
241 	for (npolled = 0; npolled < ne; ++npolled) {
242 		err = dr_cq_poll_one(dr_cq);
243 		if (err != CQ_OK)
244 			break;
245 	}
246 
247 	return err == CQ_POLL_ERR ? err : npolled;
248 }
249 
250 static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr)
251 {
252 	return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) +
253 				  sizeof(struct mlx5_wqe_flow_update_ctrl_seg) +
254 				  sizeof(struct mlx5_wqe_header_modify_argument_update_seg));
255 }
256 
257 /* We calculate for specific RC QP with the required functionality */
258 static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr)
259 {
260 	int update_arg_size;
261 	int inl_size = 0;
262 	int tot_size;
263 	int size;
264 
265 	update_arg_size = dr_qp_get_args_update_send_wqe_size(attr);
266 
267 	size = sizeof(struct mlx5_wqe_ctrl_seg) +
268 	       sizeof(struct mlx5_wqe_raddr_seg);
269 	inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) +
270 				DR_STE_SIZE, 16);
271 
272 	size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg);
273 
274 	size = max(size, update_arg_size);
275 
276 	tot_size = max(size, inl_size);
277 
278 	return ALIGN(tot_size, MLX5_SEND_WQE_BB);
279 }
280 
281 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
282 					 struct dr_qp_init_attr *attr)
283 {
284 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
285 	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
286 	struct mlx5_wq_param wqp;
287 	struct mlx5dr_qp *dr_qp;
288 	int wqe_size;
289 	int inlen;
290 	void *qpc;
291 	void *in;
292 	int err;
293 
294 	dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
295 	if (!dr_qp)
296 		return NULL;
297 
298 	wqp.buf_numa_node = mdev->priv.numa_node;
299 	wqp.db_numa_node = mdev->priv.numa_node;
300 
301 	dr_qp->rq.pc = 0;
302 	dr_qp->rq.cc = 0;
303 	dr_qp->rq.wqe_cnt = 256;
304 	dr_qp->sq.pc = 0;
305 	dr_qp->sq.cc = 0;
306 	dr_qp->sq.head = 0;
307 	dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
308 
309 	MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
310 	MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
311 	MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
312 	err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
313 				&dr_qp->wq_ctrl);
314 	if (err) {
315 		mlx5_core_warn(mdev, "Can't create QP WQ\n");
316 		goto err_wq;
317 	}
318 
319 	dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
320 				     sizeof(dr_qp->sq.wqe_head[0]),
321 				     GFP_KERNEL);
322 
323 	if (!dr_qp->sq.wqe_head) {
324 		mlx5_core_warn(mdev, "Can't allocate wqe head\n");
325 		goto err_wqe_head;
326 	}
327 
328 	inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
329 		MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
330 		dr_qp->wq_ctrl.buf.npages;
331 	in = kvzalloc(inlen, GFP_KERNEL);
332 	if (!in) {
333 		err = -ENOMEM;
334 		goto err_in;
335 	}
336 
337 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
338 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
339 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
340 	MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
341 	MLX5_SET(qpc, qpc, pd, attr->pdn);
342 	MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
343 	MLX5_SET(qpc, qpc, log_page_size,
344 		 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
345 	MLX5_SET(qpc, qpc, fre, 1);
346 	MLX5_SET(qpc, qpc, rlky, 1);
347 	MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
348 	MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
349 	MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
350 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
351 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
352 	MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
353 	MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
354 	MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
355 	if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
356 		MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
357 	mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
358 				  (__be64 *)MLX5_ADDR_OF(create_qp_in,
359 							 in, pas));
360 
361 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
362 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
363 	dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
364 	kvfree(in);
365 	if (err)
366 		goto err_in;
367 	dr_qp->uar = attr->uar;
368 	wqe_size = dr_qp_calc_rc_send_wqe(attr);
369 	dr_qp->max_inline_data = min(wqe_size -
370 				     (sizeof(struct mlx5_wqe_ctrl_seg) +
371 				      sizeof(struct mlx5_wqe_raddr_seg) +
372 				      sizeof(struct mlx5_wqe_inline_seg)),
373 				     (2 * MLX5_SEND_WQE_BB -
374 				      (sizeof(struct mlx5_wqe_ctrl_seg) +
375 				       sizeof(struct mlx5_wqe_raddr_seg) +
376 				       sizeof(struct mlx5_wqe_inline_seg))));
377 
378 	return dr_qp;
379 
380 err_in:
381 	kfree(dr_qp->sq.wqe_head);
382 err_wqe_head:
383 	mlx5_wq_destroy(&dr_qp->wq_ctrl);
384 err_wq:
385 	kfree(dr_qp);
386 	return NULL;
387 }
388 
389 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
390 			  struct mlx5dr_qp *dr_qp)
391 {
392 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
393 
394 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
395 	MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
396 	mlx5_cmd_exec_in(mdev, destroy_qp, in);
397 
398 	kfree(dr_qp->sq.wqe_head);
399 	mlx5_wq_destroy(&dr_qp->wq_ctrl);
400 	kfree(dr_qp);
401 }
402 
403 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
404 {
405 	dma_wmb();
406 	*dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
407 
408 	/* After wmb() the hw aware of new work */
409 	wmb();
410 
411 	mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
412 }
413 
414 static void
415 dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
416 					u32 remote_addr,
417 					struct dr_data_seg *data_seg,
418 					int *size)
419 {
420 	struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg;
421 	struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg;
422 
423 	wq_ctrl->general_id = cpu_to_be32(remote_addr);
424 	wq_flow_seg = (void *)(wq_ctrl + 1);
425 
426 	/* mlx5_wqe_flow_update_ctrl_seg - all reserved */
427 	memset(wq_flow_seg, 0, sizeof(*wq_flow_seg));
428 	wq_arg_seg = (void *)(wq_flow_seg + 1);
429 
430 	memcpy(wq_arg_seg->argument_list,
431 	       (void *)(uintptr_t)data_seg->addr,
432 	       data_seg->length);
433 
434 	*size = (sizeof(*wq_ctrl) +      /* WQE ctrl segment */
435 		 sizeof(*wq_flow_seg) +  /* WQE flow update ctrl seg - reserved */
436 		 sizeof(*wq_arg_seg)) /  /* WQE hdr modify arg seg - data */
437 		MLX5_SEND_WQE_DS;
438 }
439 
440 static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp,
441 			       struct dr_data_seg *data_seg, void *wqe)
442 {
443 	int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) +
444 				sizeof(struct mlx5_wqe_raddr_seg) +
445 				sizeof(struct mlx5_wqe_inline_seg);
446 	struct mlx5_wqe_inline_seg *seg;
447 	int left_space;
448 	int inl = 0;
449 	void *addr;
450 	int len;
451 	int idx;
452 
453 	seg = wqe;
454 	wqe += sizeof(*seg);
455 	addr = (void *)(unsigned long)(data_seg->addr);
456 	len  = data_seg->length;
457 	inl += len;
458 	left_space = MLX5_SEND_WQE_BB - inline_header_size;
459 
460 	if (likely(len > left_space)) {
461 		memcpy(wqe, addr, left_space);
462 		len -= left_space;
463 		addr += left_space;
464 		idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1);
465 		wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
466 	}
467 
468 	memcpy(wqe, addr, len);
469 
470 	if (likely(inl)) {
471 		seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
472 		return DIV_ROUND_UP(inl + sizeof(seg->byte_count),
473 				    MLX5_SEND_WQE_DS);
474 	} else {
475 		return 0;
476 	}
477 }
478 
479 static void
480 dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
481 				  struct mlx5_wqe_ctrl_seg *wq_ctrl,
482 				  u64 remote_addr,
483 				  u32 rkey,
484 				  struct dr_data_seg *data_seg,
485 				  unsigned int *size)
486 {
487 	struct mlx5_wqe_raddr_seg *wq_raddr;
488 	struct mlx5_wqe_data_seg *wq_dseg;
489 
490 	wq_raddr = (void *)(wq_ctrl + 1);
491 
492 	wq_raddr->raddr = cpu_to_be64(remote_addr);
493 	wq_raddr->rkey = cpu_to_be32(rkey);
494 	wq_raddr->reserved = 0;
495 
496 	wq_dseg = (void *)(wq_raddr + 1);
497 	/* WQE ctrl segment + WQE remote addr segment */
498 	*size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS;
499 
500 	if (data_seg->send_flags & IB_SEND_INLINE) {
501 		*size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg);
502 	} else {
503 		wq_dseg->byte_count = cpu_to_be32(data_seg->length);
504 		wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
505 		wq_dseg->addr = cpu_to_be64(data_seg->addr);
506 		*size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS;  /* WQE data segment */
507 	}
508 }
509 
510 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
511 			    struct dr_data_seg *data_seg)
512 {
513 	wq_ctrl->signature = 0;
514 	wq_ctrl->rsvd[0] = 0;
515 	wq_ctrl->rsvd[1] = 0;
516 	wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ?
517 				MLX5_WQE_CTRL_CQ_UPDATE : 0;
518 	wq_ctrl->imm = 0;
519 }
520 
521 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
522 			     u32 rkey, struct dr_data_seg *data_seg,
523 			     u32 opcode, bool notify_hw)
524 {
525 	struct mlx5_wqe_ctrl_seg *wq_ctrl;
526 	int opcode_mod = 0;
527 	unsigned int size;
528 	unsigned int idx;
529 
530 	idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
531 
532 	wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
533 	dr_set_ctrl_seg(wq_ctrl, data_seg);
534 
535 	switch (opcode) {
536 	case MLX5_OPCODE_RDMA_READ:
537 	case MLX5_OPCODE_RDMA_WRITE:
538 		dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr,
539 						  rkey, data_seg, &size);
540 		break;
541 	case MLX5_OPCODE_FLOW_TBL_ACCESS:
542 		opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT;
543 		dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr,
544 							data_seg, &size);
545 		break;
546 	default:
547 		WARN(true, "illegal opcode %d", opcode);
548 		return;
549 	}
550 
551 	/* --------------------------------------------------------
552 	 * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)|
553 	 * --------------------------------------------------------
554 	 */
555 	wq_ctrl->opmod_idx_opcode =
556 		cpu_to_be32((opcode_mod << 24) |
557 			    ((dr_qp->sq.pc & 0xffff) << 8) |
558 			    opcode);
559 	wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
560 
561 	dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
562 	dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++;
563 
564 	if (notify_hw)
565 		dr_cmd_notify_hw(dr_qp, wq_ctrl);
566 }
567 
568 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
569 {
570 	if (send_info->type == WRITE_ICM) {
571 		dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
572 				 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
573 		dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
574 				 &send_info->read, MLX5_OPCODE_RDMA_READ, true);
575 	} else { /* GTA_ARG */
576 		dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
577 				 &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true);
578 	}
579 
580 }
581 
582 /**
583  * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
584  * with send_list parameters:
585  *
586  *     @ste:       The data that attached to this specific ste
587  *     @size:      of data to write
588  *     @offset:    of the data from start of the hw_ste entry
589  *     @data:      data
590  *     @ste_info:  ste to be sent with send_list
591  *     @send_list: to append into it
592  *     @copy_data: if true indicates that the data should be kept because
593  *                 it's not backuped any where (like in re-hash).
594  *                 if false, it lets the data to be updated after
595  *                 it was added to the list.
596  */
597 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
598 					       u16 offset, u8 *data,
599 					       struct mlx5dr_ste_send_info *ste_info,
600 					       struct list_head *send_list,
601 					       bool copy_data)
602 {
603 	ste_info->size = size;
604 	ste_info->ste = ste;
605 	ste_info->offset = offset;
606 
607 	if (copy_data) {
608 		memcpy(ste_info->data_cont, data, size);
609 		ste_info->data = ste_info->data_cont;
610 	} else {
611 		ste_info->data = data;
612 	}
613 
614 	list_add_tail(&ste_info->send_list, send_list);
615 }
616 
617 /* The function tries to consume one wc each time, unless the queue is full, in
618  * that case, which means that the hw is behind the sw in a full queue len
619  * the function will drain the cq till it empty.
620  */
621 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
622 				struct mlx5dr_send_ring *send_ring)
623 {
624 	bool is_drain = false;
625 	int ne;
626 
627 	if (send_ring->pending_wqe < send_ring->signal_th)
628 		return 0;
629 
630 	/* Queue is full start drain it */
631 	if (send_ring->pending_wqe >=
632 	    dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
633 		is_drain = true;
634 
635 	do {
636 		ne = dr_poll_cq(send_ring->cq, 1);
637 		if (unlikely(ne < 0)) {
638 			mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
639 					    send_ring->qp->qpn);
640 			send_ring->err_state = true;
641 			return ne;
642 		} else if (ne == 1) {
643 			send_ring->pending_wqe -= send_ring->signal_th;
644 		}
645 	} while (ne == 1 ||
646 		 (is_drain && send_ring->pending_wqe  >= send_ring->signal_th));
647 
648 	return 0;
649 }
650 
651 static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
652 				    struct postsend_info *send_info)
653 {
654 	send_ring->pending_wqe++;
655 
656 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
657 		send_info->write.send_flags |= IB_SEND_SIGNALED;
658 	else
659 		send_info->write.send_flags &= ~IB_SEND_SIGNALED;
660 }
661 
662 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
663 				   struct mlx5dr_send_ring *send_ring,
664 				   struct postsend_info *send_info)
665 {
666 	u32 buff_offset;
667 
668 	if (send_info->write.length > dmn->info.max_inline_size) {
669 		buff_offset = (send_ring->tx_head &
670 			       (dmn->send_ring->signal_th - 1)) *
671 			      send_ring->max_post_send_size;
672 		/* Copy to ring mr */
673 		memcpy(send_ring->buf + buff_offset,
674 		       (void *)(uintptr_t)send_info->write.addr,
675 		       send_info->write.length);
676 		send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
677 		send_info->write.lkey = send_ring->mr->mkey;
678 
679 		send_ring->tx_head++;
680 	}
681 
682 	send_ring->pending_wqe++;
683 	if (!send_info->write.lkey)
684 		send_info->write.send_flags |= IB_SEND_INLINE;
685 
686 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
687 		send_info->write.send_flags |= IB_SEND_SIGNALED;
688 	else
689 		send_info->write.send_flags &= ~IB_SEND_SIGNALED;
690 
691 	send_ring->pending_wqe++;
692 	send_info->read.length = send_info->write.length;
693 
694 	/* Read into dedicated sync buffer */
695 	send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr;
696 	send_info->read.lkey = send_ring->sync_mr->mkey;
697 
698 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
699 		send_info->read.send_flags |= IB_SEND_SIGNALED;
700 	else
701 		send_info->read.send_flags &= ~IB_SEND_SIGNALED;
702 }
703 
704 static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
705 			      struct mlx5dr_send_ring *send_ring,
706 			      struct postsend_info *send_info)
707 {
708 	if (send_info->type == WRITE_ICM)
709 		dr_fill_write_icm_segs(dmn, send_ring, send_info);
710 	else /* args */
711 		dr_fill_write_args_segs(send_ring, send_info);
712 }
713 
714 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
715 				struct postsend_info *send_info)
716 {
717 	struct mlx5dr_send_ring *send_ring = dmn->send_ring;
718 	int ret;
719 
720 	if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
721 		     send_ring->err_state)) {
722 		mlx5_core_dbg_once(dmn->mdev,
723 				   "Skipping post send: QP err state: %d, device state: %d\n",
724 				   send_ring->err_state, dmn->mdev->state);
725 		return 0;
726 	}
727 
728 	spin_lock(&send_ring->lock);
729 
730 	ret = dr_handle_pending_wc(dmn, send_ring);
731 	if (ret)
732 		goto out_unlock;
733 
734 	dr_fill_data_segs(dmn, send_ring, send_info);
735 	dr_post_send(send_ring->qp, send_info);
736 
737 out_unlock:
738 	spin_unlock(&send_ring->lock);
739 	return ret;
740 }
741 
742 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
743 				   struct mlx5dr_ste_htbl *htbl,
744 				   u8 **data,
745 				   u32 *byte_size,
746 				   int *iterations,
747 				   int *num_stes)
748 {
749 	u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
750 	int alloc_size;
751 
752 	if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
753 		*iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
754 		*byte_size = dmn->send_ring->max_post_send_size;
755 		alloc_size = *byte_size;
756 		*num_stes = *byte_size / DR_STE_SIZE;
757 	} else {
758 		*iterations = 1;
759 		*num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
760 		alloc_size = *num_stes * DR_STE_SIZE;
761 	}
762 
763 	*data = kvzalloc(alloc_size, GFP_KERNEL);
764 	if (!*data)
765 		return -ENOMEM;
766 
767 	return 0;
768 }
769 
770 /**
771  * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
772  *
773  *     @dmn:    Domain
774  *     @ste:    The ste struct that contains the data (at
775  *              least part of it)
776  *     @data:   The real data to send size data
777  *     @size:   for writing.
778  *     @offset: The offset from the icm mapped data to
779  *              start write to this for write only part of the
780  *              buffer.
781  *
782  * Return: 0 on success.
783  */
784 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
785 			     u8 *data, u16 size, u16 offset)
786 {
787 	struct postsend_info send_info = {};
788 
789 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size);
790 
791 	send_info.write.addr = (uintptr_t)data;
792 	send_info.write.length = size;
793 	send_info.write.lkey = 0;
794 	send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
795 	send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
796 
797 	return dr_postsend_icm_data(dmn, &send_info);
798 }
799 
800 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
801 			      struct mlx5dr_ste_htbl *htbl,
802 			      u8 *formatted_ste, u8 *mask)
803 {
804 	u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
805 	int num_stes_per_iter;
806 	int iterations;
807 	u8 *data;
808 	int ret;
809 	int i;
810 	int j;
811 
812 	ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
813 				      &iterations, &num_stes_per_iter);
814 	if (ret)
815 		return ret;
816 
817 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE);
818 
819 	/* Send the data iteration times */
820 	for (i = 0; i < iterations; i++) {
821 		u32 ste_index = i * (byte_size / DR_STE_SIZE);
822 		struct postsend_info send_info = {};
823 
824 		/* Copy all ste's on the data buffer
825 		 * need to add the bit_mask
826 		 */
827 		for (j = 0; j < num_stes_per_iter; j++) {
828 			struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
829 			u32 ste_off = j * DR_STE_SIZE;
830 
831 			if (mlx5dr_ste_is_not_used(ste)) {
832 				memcpy(data + ste_off,
833 				       formatted_ste, DR_STE_SIZE);
834 			} else {
835 				/* Copy data */
836 				memcpy(data + ste_off,
837 				       htbl->chunk->hw_ste_arr +
838 				       DR_STE_SIZE_REDUCED * (ste_index + j),
839 				       DR_STE_SIZE_REDUCED);
840 				/* Copy bit_mask */
841 				memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
842 				       mask, DR_STE_SIZE_MASK);
843 				/* Only when we have mask we need to re-arrange the STE */
844 				mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx,
845 								data + (j * DR_STE_SIZE),
846 								DR_STE_SIZE);
847 			}
848 		}
849 
850 		send_info.write.addr = (uintptr_t)data;
851 		send_info.write.length = byte_size;
852 		send_info.write.lkey = 0;
853 		send_info.remote_addr =
854 			mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
855 		send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
856 
857 		ret = dr_postsend_icm_data(dmn, &send_info);
858 		if (ret)
859 			goto out_free;
860 	}
861 
862 out_free:
863 	kvfree(data);
864 	return ret;
865 }
866 
867 /* Initialize htble with default STEs */
868 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
869 					struct mlx5dr_ste_htbl *htbl,
870 					u8 *ste_init_data,
871 					bool update_hw_ste)
872 {
873 	u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
874 	int iterations;
875 	int num_stes;
876 	u8 *copy_dst;
877 	u8 *data;
878 	int ret;
879 	int i;
880 
881 	ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
882 				      &iterations, &num_stes);
883 	if (ret)
884 		return ret;
885 
886 	if (update_hw_ste) {
887 		/* Copy the reduced STE to hash table ste_arr */
888 		for (i = 0; i < num_stes; i++) {
889 			copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
890 			memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
891 		}
892 	}
893 
894 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE);
895 
896 	/* Copy the same STE on the data buffer */
897 	for (i = 0; i < num_stes; i++) {
898 		copy_dst = data + i * DR_STE_SIZE;
899 		memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
900 	}
901 
902 	/* Send the data iteration times */
903 	for (i = 0; i < iterations; i++) {
904 		u8 ste_index = i * (byte_size / DR_STE_SIZE);
905 		struct postsend_info send_info = {};
906 
907 		send_info.write.addr = (uintptr_t)data;
908 		send_info.write.length = byte_size;
909 		send_info.write.lkey = 0;
910 		send_info.remote_addr =
911 			mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
912 		send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
913 
914 		ret = dr_postsend_icm_data(dmn, &send_info);
915 		if (ret)
916 			goto out_free;
917 	}
918 
919 out_free:
920 	kvfree(data);
921 	return ret;
922 }
923 
924 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
925 				struct mlx5dr_action *action)
926 {
927 	struct postsend_info send_info = {};
928 
929 	send_info.write.addr = (uintptr_t)action->rewrite->data;
930 	send_info.write.length = action->rewrite->num_of_actions *
931 				 DR_MODIFY_ACTION_SIZE;
932 	send_info.write.lkey = 0;
933 	send_info.remote_addr =
934 		mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
935 	send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
936 
937 	return dr_postsend_icm_data(dmn, &send_info);
938 }
939 
940 int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
941 				 struct mlx5dr_icm_chunk *chunk,
942 				 u16 num_of_actions,
943 				 u8 *data)
944 {
945 	struct postsend_info send_info = {};
946 	int ret;
947 
948 	send_info.write.addr = (uintptr_t)data;
949 	send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE;
950 	send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk);
951 	send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk);
952 
953 	ret = dr_postsend_icm_data(dmn, &send_info);
954 	if (ret)
955 		return ret;
956 
957 	return 0;
958 }
959 
960 int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
961 			      u16 num_of_actions, u8 *actions_data)
962 {
963 	int data_len, iter = 0, cur_sent;
964 	u64 addr;
965 	int ret;
966 
967 	addr = (uintptr_t)actions_data;
968 	data_len = num_of_actions * DR_MODIFY_ACTION_SIZE;
969 
970 	do {
971 		struct postsend_info send_info = {};
972 
973 		send_info.type = GTA_ARG;
974 		send_info.write.addr = addr;
975 		cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE);
976 		send_info.write.length = cur_sent;
977 		send_info.write.lkey = 0;
978 		send_info.remote_addr = arg_id + iter;
979 
980 		ret = dr_postsend_icm_data(dmn, &send_info);
981 		if (ret)
982 			goto out;
983 
984 		iter++;
985 		addr += cur_sent;
986 		data_len -= cur_sent;
987 	} while (data_len > 0);
988 
989 out:
990 	return ret;
991 }
992 
993 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
994 				 struct mlx5dr_qp *dr_qp,
995 				 int port)
996 {
997 	u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
998 	void *qpc;
999 
1000 	qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
1001 
1002 	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
1003 	MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
1004 	MLX5_SET(qpc, qpc, rre, 1);
1005 	MLX5_SET(qpc, qpc, rwe, 1);
1006 
1007 	MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
1008 	MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
1009 
1010 	return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
1011 }
1012 
1013 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
1014 				    struct mlx5dr_qp *dr_qp,
1015 				    struct dr_qp_rts_attr *attr)
1016 {
1017 	u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
1018 	void *qpc;
1019 
1020 	qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
1021 
1022 	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
1023 
1024 	MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
1025 	MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
1026 	MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
1027 
1028 	MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
1029 	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
1030 
1031 	return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
1032 }
1033 
1034 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
1035 				     struct mlx5dr_qp *dr_qp,
1036 				     struct dr_qp_rtr_attr *attr)
1037 {
1038 	u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
1039 	void *qpc;
1040 
1041 	qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
1042 
1043 	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
1044 
1045 	MLX5_SET(qpc, qpc, mtu, attr->mtu);
1046 	MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
1047 	MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
1048 	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
1049 	       attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
1050 	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
1051 	       attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
1052 	MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
1053 		 attr->sgid_index);
1054 
1055 	if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
1056 		MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
1057 			 attr->udp_src_port);
1058 
1059 	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
1060 	MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
1061 	MLX5_SET(qpc, qpc, min_rnr_nak, 1);
1062 
1063 	MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
1064 	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
1065 
1066 	return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
1067 }
1068 
1069 static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
1070 {
1071 	/* Check whether RC RoCE QP creation with force loopback is allowed.
1072 	 * There are two separate capability bits for this:
1073 	 *  - force loopback when RoCE is enabled
1074 	 *  - force loopback when RoCE is disabled
1075 	 */
1076 	return ((caps->roce_caps.roce_en &&
1077 		 caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
1078 		(!caps->roce_caps.roce_en &&
1079 		 caps->roce_caps.fl_rc_qp_when_roce_disabled));
1080 }
1081 
1082 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
1083 {
1084 	struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
1085 	struct dr_qp_rts_attr rts_attr = {};
1086 	struct dr_qp_rtr_attr rtr_attr = {};
1087 	enum ib_mtu mtu = IB_MTU_1024;
1088 	u16 gid_index = 0;
1089 	int port = 1;
1090 	int ret;
1091 
1092 	/* Init */
1093 	ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
1094 	if (ret) {
1095 		mlx5dr_err(dmn, "Failed modify QP rst2init\n");
1096 		return ret;
1097 	}
1098 
1099 	/* RTR */
1100 	rtr_attr.mtu		= mtu;
1101 	rtr_attr.qp_num		= dr_qp->qpn;
1102 	rtr_attr.min_rnr_timer	= 12;
1103 	rtr_attr.port_num	= port;
1104 	rtr_attr.udp_src_port	= dmn->info.caps.roce_min_src_udp;
1105 
1106 	/* If QP creation with force loopback is allowed, then there
1107 	 * is no need for GID index when creating the QP.
1108 	 * Otherwise we query GID attributes and use GID index.
1109 	 */
1110 	rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
1111 	if (!rtr_attr.fl) {
1112 		ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
1113 					   &rtr_attr.dgid_attr);
1114 		if (ret)
1115 			return ret;
1116 
1117 		rtr_attr.sgid_index = gid_index;
1118 	}
1119 
1120 	ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
1121 	if (ret) {
1122 		mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
1123 		return ret;
1124 	}
1125 
1126 	/* RTS */
1127 	rts_attr.timeout	= 14;
1128 	rts_attr.retry_cnt	= 7;
1129 	rts_attr.rnr_retry	= 7;
1130 
1131 	ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
1132 	if (ret) {
1133 		mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
1134 		return ret;
1135 	}
1136 
1137 	return 0;
1138 }
1139 
1140 static void dr_cq_complete(struct mlx5_core_cq *mcq,
1141 			   struct mlx5_eqe *eqe)
1142 {
1143 	pr_err("CQ completion CQ: #%u\n", mcq->cqn);
1144 }
1145 
1146 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
1147 				      struct mlx5_uars_page *uar,
1148 				      size_t ncqe)
1149 {
1150 	u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
1151 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
1152 	struct mlx5_wq_param wqp;
1153 	struct mlx5_cqe64 *cqe;
1154 	struct mlx5dr_cq *cq;
1155 	int inlen, err, eqn;
1156 	void *cqc, *in;
1157 	__be64 *pas;
1158 	int vector;
1159 	u32 i;
1160 
1161 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
1162 	if (!cq)
1163 		return NULL;
1164 
1165 	ncqe = roundup_pow_of_two(ncqe);
1166 	MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
1167 
1168 	wqp.buf_numa_node = mdev->priv.numa_node;
1169 	wqp.db_numa_node = mdev->priv.numa_node;
1170 
1171 	err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
1172 			       &cq->wq_ctrl);
1173 	if (err)
1174 		goto out;
1175 
1176 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1177 		cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1178 		cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
1179 	}
1180 
1181 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1182 		sizeof(u64) * cq->wq_ctrl.buf.npages;
1183 	in = kvzalloc(inlen, GFP_KERNEL);
1184 	if (!in)
1185 		goto err_cqwq;
1186 
1187 	vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev);
1188 	err = mlx5_comp_eqn_get(mdev, vector, &eqn);
1189 	if (err) {
1190 		kvfree(in);
1191 		goto err_cqwq;
1192 	}
1193 
1194 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1195 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
1196 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
1197 	MLX5_SET(cqc, cqc, uar_page, uar->index);
1198 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1199 		 MLX5_ADAPTER_PAGE_SHIFT);
1200 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1201 
1202 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
1203 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
1204 
1205 	cq->mcq.comp  = dr_cq_complete;
1206 
1207 	err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
1208 	kvfree(in);
1209 
1210 	if (err)
1211 		goto err_cqwq;
1212 
1213 	cq->mcq.cqe_sz = 64;
1214 	cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
1215 	cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
1216 	*cq->mcq.set_ci_db = 0;
1217 
1218 	/* set no-zero value, in order to avoid the HW to run db-recovery on
1219 	 * CQ that used in polling mode.
1220 	 */
1221 	*cq->mcq.arm_db = cpu_to_be32(2 << 28);
1222 
1223 	cq->mcq.vector = 0;
1224 	cq->mcq.uar = uar;
1225 	cq->mdev = mdev;
1226 
1227 	return cq;
1228 
1229 err_cqwq:
1230 	mlx5_wq_destroy(&cq->wq_ctrl);
1231 out:
1232 	kfree(cq);
1233 	return NULL;
1234 }
1235 
1236 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
1237 {
1238 	mlx5_core_destroy_cq(mdev, &cq->mcq);
1239 	mlx5_wq_destroy(&cq->wq_ctrl);
1240 	kfree(cq);
1241 }
1242 
1243 static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
1244 {
1245 	u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
1246 	void *mkc;
1247 
1248 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1249 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
1250 	MLX5_SET(mkc, mkc, a, 1);
1251 	MLX5_SET(mkc, mkc, rw, 1);
1252 	MLX5_SET(mkc, mkc, rr, 1);
1253 	MLX5_SET(mkc, mkc, lw, 1);
1254 	MLX5_SET(mkc, mkc, lr, 1);
1255 
1256 	MLX5_SET(mkc, mkc, pd, pdn);
1257 	MLX5_SET(mkc, mkc, length64, 1);
1258 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1259 
1260 	return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
1261 }
1262 
1263 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
1264 				   u32 pdn, void *buf, size_t size)
1265 {
1266 	struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1267 	struct device *dma_device;
1268 	dma_addr_t dma_addr;
1269 	int err;
1270 
1271 	if (!mr)
1272 		return NULL;
1273 
1274 	dma_device = mlx5_core_dma_dev(mdev);
1275 	dma_addr = dma_map_single(dma_device, buf, size,
1276 				  DMA_BIDIRECTIONAL);
1277 	err = dma_mapping_error(dma_device, dma_addr);
1278 	if (err) {
1279 		mlx5_core_warn(mdev, "Can't dma buf\n");
1280 		kfree(mr);
1281 		return NULL;
1282 	}
1283 
1284 	err = dr_create_mkey(mdev, pdn, &mr->mkey);
1285 	if (err) {
1286 		mlx5_core_warn(mdev, "Can't create mkey\n");
1287 		dma_unmap_single(dma_device, dma_addr, size,
1288 				 DMA_BIDIRECTIONAL);
1289 		kfree(mr);
1290 		return NULL;
1291 	}
1292 
1293 	mr->dma_addr = dma_addr;
1294 	mr->size = size;
1295 	mr->addr = buf;
1296 
1297 	return mr;
1298 }
1299 
1300 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
1301 {
1302 	mlx5_core_destroy_mkey(mdev, mr->mkey);
1303 	dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
1304 			 DMA_BIDIRECTIONAL);
1305 	kfree(mr);
1306 }
1307 
1308 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
1309 {
1310 	struct dr_qp_init_attr init_attr = {};
1311 	int cq_size;
1312 	int size;
1313 	int ret;
1314 
1315 	dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
1316 	if (!dmn->send_ring)
1317 		return -ENOMEM;
1318 
1319 	cq_size = QUEUE_SIZE + 1;
1320 	dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
1321 	if (!dmn->send_ring->cq) {
1322 		mlx5dr_err(dmn, "Failed creating CQ\n");
1323 		ret = -ENOMEM;
1324 		goto free_send_ring;
1325 	}
1326 
1327 	init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
1328 	init_attr.pdn = dmn->pdn;
1329 	init_attr.uar = dmn->uar;
1330 	init_attr.max_send_wr = QUEUE_SIZE;
1331 
1332 	/* Isolated VL is applicable only if force loopback is supported */
1333 	if (dr_send_allow_fl(&dmn->info.caps))
1334 		init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
1335 
1336 	spin_lock_init(&dmn->send_ring->lock);
1337 
1338 	dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
1339 	if (!dmn->send_ring->qp)  {
1340 		mlx5dr_err(dmn, "Failed creating QP\n");
1341 		ret = -ENOMEM;
1342 		goto clean_cq;
1343 	}
1344 
1345 	dmn->send_ring->cq->qp = dmn->send_ring->qp;
1346 
1347 	dmn->info.max_send_wr = QUEUE_SIZE;
1348 	init_attr.max_send_sge = 1;
1349 	dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
1350 					DR_STE_SIZE);
1351 
1352 	dmn->send_ring->signal_th = dmn->info.max_send_wr /
1353 		SIGNAL_PER_DIV_QUEUE;
1354 
1355 	/* Prepare qp to be used */
1356 	ret = dr_prepare_qp_to_rts(dmn);
1357 	if (ret)
1358 		goto clean_qp;
1359 
1360 	dmn->send_ring->max_post_send_size =
1361 		mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
1362 						   DR_ICM_TYPE_STE);
1363 
1364 	/* Allocating the max size as a buffer for writing */
1365 	size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
1366 	dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
1367 	if (!dmn->send_ring->buf) {
1368 		ret = -ENOMEM;
1369 		goto clean_qp;
1370 	}
1371 
1372 	dmn->send_ring->buf_size = size;
1373 
1374 	dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
1375 				       dmn->pdn, dmn->send_ring->buf, size);
1376 	if (!dmn->send_ring->mr) {
1377 		ret = -ENOMEM;
1378 		goto free_mem;
1379 	}
1380 
1381 	dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size,
1382 					    GFP_KERNEL);
1383 	if (!dmn->send_ring->sync_buff) {
1384 		ret = -ENOMEM;
1385 		goto clean_mr;
1386 	}
1387 
1388 	dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
1389 					    dmn->pdn, dmn->send_ring->sync_buff,
1390 					    dmn->send_ring->max_post_send_size);
1391 	if (!dmn->send_ring->sync_mr) {
1392 		ret = -ENOMEM;
1393 		goto free_sync_mem;
1394 	}
1395 
1396 	return 0;
1397 
1398 free_sync_mem:
1399 	kfree(dmn->send_ring->sync_buff);
1400 clean_mr:
1401 	dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
1402 free_mem:
1403 	kfree(dmn->send_ring->buf);
1404 clean_qp:
1405 	dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
1406 clean_cq:
1407 	dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
1408 free_send_ring:
1409 	kfree(dmn->send_ring);
1410 
1411 	return ret;
1412 }
1413 
1414 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
1415 			   struct mlx5dr_send_ring *send_ring)
1416 {
1417 	dr_destroy_qp(dmn->mdev, send_ring->qp);
1418 	dr_destroy_cq(dmn->mdev, send_ring->cq);
1419 	dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
1420 	dr_dereg_mr(dmn->mdev, send_ring->mr);
1421 	kfree(send_ring->buf);
1422 	kfree(send_ring->sync_buff);
1423 	kfree(send_ring);
1424 }
1425 
1426 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
1427 {
1428 	struct mlx5dr_send_ring *send_ring = dmn->send_ring;
1429 	struct postsend_info send_info = {};
1430 	u8 data[DR_STE_SIZE];
1431 	int num_of_sends_req;
1432 	int ret;
1433 	int i;
1434 
1435 	/* Sending this amount of requests makes sure we will get drain */
1436 	num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
1437 
1438 	/* Send fake requests forcing the last to be signaled */
1439 	send_info.write.addr = (uintptr_t)data;
1440 	send_info.write.length = DR_STE_SIZE;
1441 	send_info.write.lkey = 0;
1442 	/* Using the sync_mr in order to write/read */
1443 	send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
1444 	send_info.rkey = send_ring->sync_mr->mkey;
1445 
1446 	for (i = 0; i < num_of_sends_req; i++) {
1447 		ret = dr_postsend_icm_data(dmn, &send_info);
1448 		if (ret)
1449 			return ret;
1450 	}
1451 
1452 	spin_lock(&send_ring->lock);
1453 	ret = dr_handle_pending_wc(dmn, send_ring);
1454 	spin_unlock(&send_ring->lock);
1455 
1456 	return ret;
1457 }
1458