1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/smp.h>
5 #include "dr_types.h"
6 
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10 #define DR_SEND_INFO_POOL_SIZE 1000
11 
12 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
13 
14 struct dr_data_seg {
15 	u64 addr;
16 	u32 length;
17 	u32 lkey;
18 	unsigned int send_flags;
19 };
20 
21 enum send_info_type {
22 	WRITE_ICM = 0,
23 	GTA_ARG   = 1,
24 };
25 
26 struct postsend_info {
27 	enum send_info_type type;
28 	struct dr_data_seg write;
29 	struct dr_data_seg read;
30 	u64 remote_addr;
31 	u32 rkey;
32 };
33 
34 struct dr_qp_rtr_attr {
35 	struct mlx5dr_cmd_gid_attr dgid_attr;
36 	enum ib_mtu mtu;
37 	u32 qp_num;
38 	u16 port_num;
39 	u8 min_rnr_timer;
40 	u8 sgid_index;
41 	u16 udp_src_port;
42 	u8 fl:1;
43 };
44 
45 struct dr_qp_rts_attr {
46 	u8 timeout;
47 	u8 retry_cnt;
48 	u8 rnr_retry;
49 };
50 
51 struct dr_qp_init_attr {
52 	u32 cqn;
53 	u32 pdn;
54 	u32 max_send_wr;
55 	struct mlx5_uars_page *uar;
56 	u8 isolate_vl_tc:1;
57 };
58 
59 struct mlx5dr_send_info_pool_obj {
60 	struct mlx5dr_ste_send_info ste_send_info;
61 	struct mlx5dr_send_info_pool *pool;
62 	struct list_head list_node;
63 };
64 
65 struct mlx5dr_send_info_pool {
66 	struct list_head free_list;
67 };
68 
dr_send_info_pool_fill(struct mlx5dr_send_info_pool * pool)69 static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool)
70 {
71 	struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
72 	int i;
73 
74 	for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) {
75 		pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL);
76 		if (!pool_obj)
77 			goto clean_pool;
78 
79 		pool_obj->pool = pool;
80 		list_add_tail(&pool_obj->list_node, &pool->free_list);
81 	}
82 
83 	return 0;
84 
85 clean_pool:
86 	list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
87 		list_del(&pool_obj->list_node);
88 		kfree(pool_obj);
89 	}
90 
91 	return -ENOMEM;
92 }
93 
dr_send_info_pool_destroy(struct mlx5dr_send_info_pool * pool)94 static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool)
95 {
96 	struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
97 
98 	list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
99 		list_del(&pool_obj->list_node);
100 		kfree(pool_obj);
101 	}
102 
103 	kfree(pool);
104 }
105 
mlx5dr_send_info_pool_destroy(struct mlx5dr_domain * dmn)106 void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn)
107 {
108 	dr_send_info_pool_destroy(dmn->send_info_pool_tx);
109 	dr_send_info_pool_destroy(dmn->send_info_pool_rx);
110 }
111 
dr_send_info_pool_create(void)112 static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void)
113 {
114 	struct mlx5dr_send_info_pool *pool;
115 	int ret;
116 
117 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
118 	if (!pool)
119 		return NULL;
120 
121 	INIT_LIST_HEAD(&pool->free_list);
122 
123 	ret = dr_send_info_pool_fill(pool);
124 	if (ret) {
125 		kfree(pool);
126 		return NULL;
127 	}
128 
129 	return pool;
130 }
131 
mlx5dr_send_info_pool_create(struct mlx5dr_domain * dmn)132 int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn)
133 {
134 	dmn->send_info_pool_rx = dr_send_info_pool_create();
135 	if (!dmn->send_info_pool_rx)
136 		return -ENOMEM;
137 
138 	dmn->send_info_pool_tx = dr_send_info_pool_create();
139 	if (!dmn->send_info_pool_tx) {
140 		dr_send_info_pool_destroy(dmn->send_info_pool_rx);
141 		return -ENOMEM;
142 	}
143 
144 	return 0;
145 }
146 
147 struct mlx5dr_ste_send_info
mlx5dr_send_info_alloc(struct mlx5dr_domain * dmn,enum mlx5dr_domain_nic_type nic_type)148 *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn,
149 			enum mlx5dr_domain_nic_type nic_type)
150 {
151 	struct mlx5dr_send_info_pool_obj *pool_obj;
152 	struct mlx5dr_send_info_pool *pool;
153 	int ret;
154 
155 	pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx :
156 						   dmn->send_info_pool_tx;
157 
158 	if (unlikely(list_empty(&pool->free_list))) {
159 		ret = dr_send_info_pool_fill(pool);
160 		if (ret)
161 			return NULL;
162 	}
163 
164 	pool_obj = list_first_entry_or_null(&pool->free_list,
165 					    struct mlx5dr_send_info_pool_obj,
166 					    list_node);
167 
168 	if (likely(pool_obj)) {
169 		list_del_init(&pool_obj->list_node);
170 	} else {
171 		WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool");
172 		return NULL;
173 	}
174 
175 	return &pool_obj->ste_send_info;
176 }
177 
mlx5dr_send_info_free(struct mlx5dr_ste_send_info * ste_send_info)178 void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info)
179 {
180 	struct mlx5dr_send_info_pool_obj *pool_obj;
181 
182 	pool_obj = container_of(ste_send_info,
183 				struct mlx5dr_send_info_pool_obj,
184 				ste_send_info);
185 
186 	list_add(&pool_obj->list_node, &pool_obj->pool->free_list);
187 }
188 
dr_parse_cqe(struct mlx5dr_cq * dr_cq,struct mlx5_cqe64 * cqe64)189 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
190 {
191 	unsigned int idx;
192 	u8 opcode;
193 
194 	opcode = get_cqe_opcode(cqe64);
195 	if (opcode == MLX5_CQE_REQ_ERR) {
196 		idx = be16_to_cpu(cqe64->wqe_counter) &
197 			(dr_cq->qp->sq.wqe_cnt - 1);
198 		dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
199 	} else if (opcode == MLX5_CQE_RESP_ERR) {
200 		++dr_cq->qp->sq.cc;
201 	} else {
202 		idx = be16_to_cpu(cqe64->wqe_counter) &
203 			(dr_cq->qp->sq.wqe_cnt - 1);
204 		dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
205 
206 		return CQ_OK;
207 	}
208 
209 	return CQ_POLL_ERR;
210 }
211 
dr_cq_poll_one(struct mlx5dr_cq * dr_cq)212 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
213 {
214 	struct mlx5_cqe64 *cqe64;
215 	int err;
216 
217 	cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
218 	if (!cqe64) {
219 		if (unlikely(dr_cq->mdev->state ==
220 			     MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
221 			mlx5_core_dbg_once(dr_cq->mdev,
222 					   "Polling CQ while device is shutting down\n");
223 			return CQ_POLL_ERR;
224 		}
225 		return CQ_EMPTY;
226 	}
227 
228 	mlx5_cqwq_pop(&dr_cq->wq);
229 	err = dr_parse_cqe(dr_cq, cqe64);
230 	mlx5_cqwq_update_db_record(&dr_cq->wq);
231 
232 	return err;
233 }
234 
dr_poll_cq(struct mlx5dr_cq * dr_cq,int ne)235 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
236 {
237 	int npolled;
238 	int err = 0;
239 
240 	for (npolled = 0; npolled < ne; ++npolled) {
241 		err = dr_cq_poll_one(dr_cq);
242 		if (err != CQ_OK)
243 			break;
244 	}
245 
246 	return err == CQ_POLL_ERR ? err : npolled;
247 }
248 
dr_create_rc_qp(struct mlx5_core_dev * mdev,struct dr_qp_init_attr * attr)249 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
250 					 struct dr_qp_init_attr *attr)
251 {
252 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
253 	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
254 	struct mlx5_wq_param wqp;
255 	struct mlx5dr_qp *dr_qp;
256 	int inlen;
257 	void *qpc;
258 	void *in;
259 	int err;
260 
261 	dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
262 	if (!dr_qp)
263 		return NULL;
264 
265 	wqp.buf_numa_node = mdev->priv.numa_node;
266 	wqp.db_numa_node = mdev->priv.numa_node;
267 
268 	dr_qp->rq.pc = 0;
269 	dr_qp->rq.cc = 0;
270 	dr_qp->rq.wqe_cnt = 256;
271 	dr_qp->sq.pc = 0;
272 	dr_qp->sq.cc = 0;
273 	dr_qp->sq.head = 0;
274 	dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
275 
276 	MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
277 	MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
278 	MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
279 	err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
280 				&dr_qp->wq_ctrl);
281 	if (err) {
282 		mlx5_core_warn(mdev, "Can't create QP WQ\n");
283 		goto err_wq;
284 	}
285 
286 	dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
287 				     sizeof(dr_qp->sq.wqe_head[0]),
288 				     GFP_KERNEL);
289 
290 	if (!dr_qp->sq.wqe_head) {
291 		mlx5_core_warn(mdev, "Can't allocate wqe head\n");
292 		goto err_wqe_head;
293 	}
294 
295 	inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
296 		MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
297 		dr_qp->wq_ctrl.buf.npages;
298 	in = kvzalloc(inlen, GFP_KERNEL);
299 	if (!in) {
300 		err = -ENOMEM;
301 		goto err_in;
302 	}
303 
304 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
305 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
306 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
307 	MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
308 	MLX5_SET(qpc, qpc, pd, attr->pdn);
309 	MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
310 	MLX5_SET(qpc, qpc, log_page_size,
311 		 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
312 	MLX5_SET(qpc, qpc, fre, 1);
313 	MLX5_SET(qpc, qpc, rlky, 1);
314 	MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
315 	MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
316 	MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
317 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
318 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
319 	MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
320 	MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
321 	MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
322 	if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
323 		MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
324 	mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
325 				  (__be64 *)MLX5_ADDR_OF(create_qp_in,
326 							 in, pas));
327 
328 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
329 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
330 	dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
331 	kvfree(in);
332 	if (err)
333 		goto err_in;
334 	dr_qp->uar = attr->uar;
335 
336 	return dr_qp;
337 
338 err_in:
339 	kfree(dr_qp->sq.wqe_head);
340 err_wqe_head:
341 	mlx5_wq_destroy(&dr_qp->wq_ctrl);
342 err_wq:
343 	kfree(dr_qp);
344 	return NULL;
345 }
346 
dr_destroy_qp(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp)347 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
348 			  struct mlx5dr_qp *dr_qp)
349 {
350 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
351 
352 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
353 	MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
354 	mlx5_cmd_exec_in(mdev, destroy_qp, in);
355 
356 	kfree(dr_qp->sq.wqe_head);
357 	mlx5_wq_destroy(&dr_qp->wq_ctrl);
358 	kfree(dr_qp);
359 }
360 
dr_cmd_notify_hw(struct mlx5dr_qp * dr_qp,void * ctrl)361 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
362 {
363 	dma_wmb();
364 	*dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
365 
366 	/* After wmb() the hw aware of new work */
367 	wmb();
368 
369 	mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
370 }
371 
372 static void
dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg * wq_ctrl,u32 remote_addr,struct dr_data_seg * data_seg,int * size)373 dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
374 					u32 remote_addr,
375 					struct dr_data_seg *data_seg,
376 					int *size)
377 {
378 	struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg;
379 	struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg;
380 
381 	wq_ctrl->general_id = cpu_to_be32(remote_addr);
382 	wq_flow_seg = (void *)(wq_ctrl + 1);
383 
384 	/* mlx5_wqe_flow_update_ctrl_seg - all reserved */
385 	memset(wq_flow_seg, 0, sizeof(*wq_flow_seg));
386 	wq_arg_seg = (void *)(wq_flow_seg + 1);
387 
388 	memcpy(wq_arg_seg->argument_list,
389 	       (void *)(uintptr_t)data_seg->addr,
390 	       data_seg->length);
391 
392 	*size = (sizeof(*wq_ctrl) +      /* WQE ctrl segment */
393 		 sizeof(*wq_flow_seg) +  /* WQE flow update ctrl seg - reserved */
394 		 sizeof(*wq_arg_seg)) /  /* WQE hdr modify arg seg - data */
395 		MLX5_SEND_WQE_DS;
396 }
397 
398 static void
dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg * wq_ctrl,u64 remote_addr,u32 rkey,struct dr_data_seg * data_seg,unsigned int * size)399 dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
400 				  u64 remote_addr,
401 				  u32 rkey,
402 				  struct dr_data_seg *data_seg,
403 				  unsigned int *size)
404 {
405 	struct mlx5_wqe_raddr_seg *wq_raddr;
406 	struct mlx5_wqe_data_seg *wq_dseg;
407 
408 	wq_raddr = (void *)(wq_ctrl + 1);
409 
410 	wq_raddr->raddr = cpu_to_be64(remote_addr);
411 	wq_raddr->rkey = cpu_to_be32(rkey);
412 	wq_raddr->reserved = 0;
413 
414 	wq_dseg = (void *)(wq_raddr + 1);
415 
416 	wq_dseg->byte_count = cpu_to_be32(data_seg->length);
417 	wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
418 	wq_dseg->addr = cpu_to_be64(data_seg->addr);
419 
420 	*size = (sizeof(*wq_ctrl) +    /* WQE ctrl segment */
421 		 sizeof(*wq_dseg) +    /* WQE data segment */
422 		 sizeof(*wq_raddr)) /  /* WQE remote addr segment */
423 		MLX5_SEND_WQE_DS;
424 }
425 
dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg * wq_ctrl,struct dr_data_seg * data_seg)426 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
427 			    struct dr_data_seg *data_seg)
428 {
429 	wq_ctrl->signature = 0;
430 	wq_ctrl->rsvd[0] = 0;
431 	wq_ctrl->rsvd[1] = 0;
432 	wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ?
433 				MLX5_WQE_CTRL_CQ_UPDATE : 0;
434 	wq_ctrl->imm = 0;
435 }
436 
dr_rdma_segments(struct mlx5dr_qp * dr_qp,u64 remote_addr,u32 rkey,struct dr_data_seg * data_seg,u32 opcode,bool notify_hw)437 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
438 			     u32 rkey, struct dr_data_seg *data_seg,
439 			     u32 opcode, bool notify_hw)
440 {
441 	struct mlx5_wqe_ctrl_seg *wq_ctrl;
442 	int opcode_mod = 0;
443 	unsigned int size;
444 	unsigned int idx;
445 
446 	idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
447 
448 	wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
449 	dr_set_ctrl_seg(wq_ctrl, data_seg);
450 
451 	switch (opcode) {
452 	case MLX5_OPCODE_RDMA_READ:
453 	case MLX5_OPCODE_RDMA_WRITE:
454 		dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
455 						  rkey, data_seg, &size);
456 		break;
457 	case MLX5_OPCODE_FLOW_TBL_ACCESS:
458 		opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT;
459 		dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr,
460 							data_seg, &size);
461 		break;
462 	default:
463 		WARN(true, "illegal opcode %d", opcode);
464 		return;
465 	}
466 
467 	/* --------------------------------------------------------
468 	 * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)|
469 	 * --------------------------------------------------------
470 	 */
471 	wq_ctrl->opmod_idx_opcode =
472 		cpu_to_be32((opcode_mod << 24) |
473 			    ((dr_qp->sq.pc & 0xffff) << 8) |
474 			    opcode);
475 	wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
476 
477 	dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
478 	dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++;
479 
480 	if (notify_hw)
481 		dr_cmd_notify_hw(dr_qp, wq_ctrl);
482 }
483 
dr_post_send(struct mlx5dr_qp * dr_qp,struct postsend_info * send_info)484 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
485 {
486 	if (send_info->type == WRITE_ICM) {
487 		dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
488 				 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
489 		dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
490 				 &send_info->read, MLX5_OPCODE_RDMA_READ, true);
491 	} else { /* GTA_ARG */
492 		dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
493 				 &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true);
494 	}
495 
496 }
497 
498 /**
499  * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
500  * with send_list parameters:
501  *
502  *     @ste:       The data that attached to this specific ste
503  *     @size:      of data to write
504  *     @offset:    of the data from start of the hw_ste entry
505  *     @data:      data
506  *     @ste_info:  ste to be sent with send_list
507  *     @send_list: to append into it
508  *     @copy_data: if true indicates that the data should be kept because
509  *                 it's not backuped any where (like in re-hash).
510  *                 if false, it lets the data to be updated after
511  *                 it was added to the list.
512  */
mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste * ste,u16 size,u16 offset,u8 * data,struct mlx5dr_ste_send_info * ste_info,struct list_head * send_list,bool copy_data)513 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
514 					       u16 offset, u8 *data,
515 					       struct mlx5dr_ste_send_info *ste_info,
516 					       struct list_head *send_list,
517 					       bool copy_data)
518 {
519 	ste_info->size = size;
520 	ste_info->ste = ste;
521 	ste_info->offset = offset;
522 
523 	if (copy_data) {
524 		memcpy(ste_info->data_cont, data, size);
525 		ste_info->data = ste_info->data_cont;
526 	} else {
527 		ste_info->data = data;
528 	}
529 
530 	list_add_tail(&ste_info->send_list, send_list);
531 }
532 
533 /* The function tries to consume one wc each time, unless the queue is full, in
534  * that case, which means that the hw is behind the sw in a full queue len
535  * the function will drain the cq till it empty.
536  */
dr_handle_pending_wc(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)537 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
538 				struct mlx5dr_send_ring *send_ring)
539 {
540 	bool is_drain = false;
541 	int ne;
542 
543 	if (send_ring->pending_wqe < send_ring->signal_th)
544 		return 0;
545 
546 	/* Queue is full start drain it */
547 	if (send_ring->pending_wqe >=
548 	    dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
549 		is_drain = true;
550 
551 	do {
552 		ne = dr_poll_cq(send_ring->cq, 1);
553 		if (unlikely(ne < 0)) {
554 			mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
555 					    send_ring->qp->qpn);
556 			send_ring->err_state = true;
557 			return ne;
558 		} else if (ne == 1) {
559 			send_ring->pending_wqe -= send_ring->signal_th;
560 		}
561 	} while (ne == 1 ||
562 		 (is_drain && send_ring->pending_wqe  >= send_ring->signal_th));
563 
564 	return 0;
565 }
566 
dr_fill_write_args_segs(struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)567 static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
568 				    struct postsend_info *send_info)
569 {
570 	send_ring->pending_wqe++;
571 
572 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
573 		send_info->write.send_flags |= IB_SEND_SIGNALED;
574 	else
575 		send_info->write.send_flags = 0;
576 }
577 
dr_fill_write_icm_segs(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)578 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
579 				   struct mlx5dr_send_ring *send_ring,
580 				   struct postsend_info *send_info)
581 {
582 	u32 buff_offset;
583 
584 	if (send_info->write.length > dmn->info.max_inline_size) {
585 		buff_offset = (send_ring->tx_head &
586 			       (dmn->send_ring->signal_th - 1)) *
587 			      send_ring->max_post_send_size;
588 		/* Copy to ring mr */
589 		memcpy(send_ring->buf + buff_offset,
590 		       (void *)(uintptr_t)send_info->write.addr,
591 		       send_info->write.length);
592 		send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
593 		send_info->write.lkey = send_ring->mr->mkey;
594 
595 		send_ring->tx_head++;
596 	}
597 
598 	send_ring->pending_wqe++;
599 
600 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
601 		send_info->write.send_flags |= IB_SEND_SIGNALED;
602 
603 	send_ring->pending_wqe++;
604 	send_info->read.length = send_info->write.length;
605 
606 	/* Read into dedicated sync buffer */
607 	send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr;
608 	send_info->read.lkey = send_ring->sync_mr->mkey;
609 
610 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
611 		send_info->read.send_flags = IB_SEND_SIGNALED;
612 	else
613 		send_info->read.send_flags = 0;
614 }
615 
dr_fill_data_segs(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)616 static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
617 			      struct mlx5dr_send_ring *send_ring,
618 			      struct postsend_info *send_info)
619 {
620 	if (send_info->type == WRITE_ICM)
621 		dr_fill_write_icm_segs(dmn, send_ring, send_info);
622 	else /* args */
623 		dr_fill_write_args_segs(send_ring, send_info);
624 }
625 
dr_postsend_icm_data(struct mlx5dr_domain * dmn,struct postsend_info * send_info)626 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
627 				struct postsend_info *send_info)
628 {
629 	struct mlx5dr_send_ring *send_ring = dmn->send_ring;
630 	int ret;
631 
632 	if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
633 		     send_ring->err_state)) {
634 		mlx5_core_dbg_once(dmn->mdev,
635 				   "Skipping post send: QP err state: %d, device state: %d\n",
636 				   send_ring->err_state, dmn->mdev->state);
637 		return 0;
638 	}
639 
640 	spin_lock(&send_ring->lock);
641 
642 	ret = dr_handle_pending_wc(dmn, send_ring);
643 	if (ret)
644 		goto out_unlock;
645 
646 	dr_fill_data_segs(dmn, send_ring, send_info);
647 	dr_post_send(send_ring->qp, send_info);
648 
649 out_unlock:
650 	spin_unlock(&send_ring->lock);
651 	return ret;
652 }
653 
dr_get_tbl_copy_details(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 ** data,u32 * byte_size,int * iterations,int * num_stes)654 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
655 				   struct mlx5dr_ste_htbl *htbl,
656 				   u8 **data,
657 				   u32 *byte_size,
658 				   int *iterations,
659 				   int *num_stes)
660 {
661 	u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
662 	int alloc_size;
663 
664 	if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
665 		*iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
666 		*byte_size = dmn->send_ring->max_post_send_size;
667 		alloc_size = *byte_size;
668 		*num_stes = *byte_size / DR_STE_SIZE;
669 	} else {
670 		*iterations = 1;
671 		*num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
672 		alloc_size = *num_stes * DR_STE_SIZE;
673 	}
674 
675 	*data = kvzalloc(alloc_size, GFP_KERNEL);
676 	if (!*data)
677 		return -ENOMEM;
678 
679 	return 0;
680 }
681 
682 /**
683  * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
684  *
685  *     @dmn:    Domain
686  *     @ste:    The ste struct that contains the data (at
687  *              least part of it)
688  *     @data:   The real data to send size data
689  *     @size:   for writing.
690  *     @offset: The offset from the icm mapped data to
691  *              start write to this for write only part of the
692  *              buffer.
693  *
694  * Return: 0 on success.
695  */
mlx5dr_send_postsend_ste(struct mlx5dr_domain * dmn,struct mlx5dr_ste * ste,u8 * data,u16 size,u16 offset)696 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
697 			     u8 *data, u16 size, u16 offset)
698 {
699 	struct postsend_info send_info = {};
700 
701 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size);
702 
703 	send_info.write.addr = (uintptr_t)data;
704 	send_info.write.length = size;
705 	send_info.write.lkey = 0;
706 	send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
707 	send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
708 
709 	return dr_postsend_icm_data(dmn, &send_info);
710 }
711 
mlx5dr_send_postsend_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * formatted_ste,u8 * mask)712 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
713 			      struct mlx5dr_ste_htbl *htbl,
714 			      u8 *formatted_ste, u8 *mask)
715 {
716 	u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
717 	int num_stes_per_iter;
718 	int iterations;
719 	u8 *data;
720 	int ret;
721 	int i;
722 	int j;
723 
724 	ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
725 				      &iterations, &num_stes_per_iter);
726 	if (ret)
727 		return ret;
728 
729 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE);
730 
731 	/* Send the data iteration times */
732 	for (i = 0; i < iterations; i++) {
733 		u32 ste_index = i * (byte_size / DR_STE_SIZE);
734 		struct postsend_info send_info = {};
735 
736 		/* Copy all ste's on the data buffer
737 		 * need to add the bit_mask
738 		 */
739 		for (j = 0; j < num_stes_per_iter; j++) {
740 			struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
741 			u32 ste_off = j * DR_STE_SIZE;
742 
743 			if (mlx5dr_ste_is_not_used(ste)) {
744 				memcpy(data + ste_off,
745 				       formatted_ste, DR_STE_SIZE);
746 			} else {
747 				/* Copy data */
748 				memcpy(data + ste_off,
749 				       htbl->chunk->hw_ste_arr +
750 				       DR_STE_SIZE_REDUCED * (ste_index + j),
751 				       DR_STE_SIZE_REDUCED);
752 				/* Copy bit_mask */
753 				memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
754 				       mask, DR_STE_SIZE_MASK);
755 				/* Only when we have mask we need to re-arrange the STE */
756 				mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx,
757 								data + (j * DR_STE_SIZE),
758 								DR_STE_SIZE);
759 			}
760 		}
761 
762 		send_info.write.addr = (uintptr_t)data;
763 		send_info.write.length = byte_size;
764 		send_info.write.lkey = 0;
765 		send_info.remote_addr =
766 			mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
767 		send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
768 
769 		ret = dr_postsend_icm_data(dmn, &send_info);
770 		if (ret)
771 			goto out_free;
772 	}
773 
774 out_free:
775 	kvfree(data);
776 	return ret;
777 }
778 
779 /* Initialize htble with default STEs */
mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * ste_init_data,bool update_hw_ste)780 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
781 					struct mlx5dr_ste_htbl *htbl,
782 					u8 *ste_init_data,
783 					bool update_hw_ste)
784 {
785 	u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
786 	int iterations;
787 	int num_stes;
788 	u8 *copy_dst;
789 	u8 *data;
790 	int ret;
791 	int i;
792 
793 	ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
794 				      &iterations, &num_stes);
795 	if (ret)
796 		return ret;
797 
798 	if (update_hw_ste) {
799 		/* Copy the reduced STE to hash table ste_arr */
800 		for (i = 0; i < num_stes; i++) {
801 			copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
802 			memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
803 		}
804 	}
805 
806 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE);
807 
808 	/* Copy the same STE on the data buffer */
809 	for (i = 0; i < num_stes; i++) {
810 		copy_dst = data + i * DR_STE_SIZE;
811 		memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
812 	}
813 
814 	/* Send the data iteration times */
815 	for (i = 0; i < iterations; i++) {
816 		u8 ste_index = i * (byte_size / DR_STE_SIZE);
817 		struct postsend_info send_info = {};
818 
819 		send_info.write.addr = (uintptr_t)data;
820 		send_info.write.length = byte_size;
821 		send_info.write.lkey = 0;
822 		send_info.remote_addr =
823 			mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
824 		send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
825 
826 		ret = dr_postsend_icm_data(dmn, &send_info);
827 		if (ret)
828 			goto out_free;
829 	}
830 
831 out_free:
832 	kvfree(data);
833 	return ret;
834 }
835 
mlx5dr_send_postsend_action(struct mlx5dr_domain * dmn,struct mlx5dr_action * action)836 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
837 				struct mlx5dr_action *action)
838 {
839 	struct postsend_info send_info = {};
840 
841 	send_info.write.addr = (uintptr_t)action->rewrite->data;
842 	send_info.write.length = action->rewrite->num_of_actions *
843 				 DR_MODIFY_ACTION_SIZE;
844 	send_info.write.lkey = 0;
845 	send_info.remote_addr =
846 		mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
847 	send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
848 
849 	return dr_postsend_icm_data(dmn, &send_info);
850 }
851 
mlx5dr_send_postsend_pattern(struct mlx5dr_domain * dmn,struct mlx5dr_icm_chunk * chunk,u16 num_of_actions,u8 * data)852 int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
853 				 struct mlx5dr_icm_chunk *chunk,
854 				 u16 num_of_actions,
855 				 u8 *data)
856 {
857 	struct postsend_info send_info = {};
858 	int ret;
859 
860 	send_info.write.addr = (uintptr_t)data;
861 	send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE;
862 	send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk);
863 	send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk);
864 
865 	ret = dr_postsend_icm_data(dmn, &send_info);
866 	if (ret)
867 		return ret;
868 
869 	return 0;
870 }
871 
mlx5dr_send_postsend_args(struct mlx5dr_domain * dmn,u64 arg_id,u16 num_of_actions,u8 * actions_data)872 int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
873 			      u16 num_of_actions, u8 *actions_data)
874 {
875 	int data_len, iter = 0, cur_sent;
876 	u64 addr;
877 	int ret;
878 
879 	addr = (uintptr_t)actions_data;
880 	data_len = num_of_actions * DR_MODIFY_ACTION_SIZE;
881 
882 	do {
883 		struct postsend_info send_info = {};
884 
885 		send_info.type = GTA_ARG;
886 		send_info.write.addr = addr;
887 		cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE);
888 		send_info.write.length = cur_sent;
889 		send_info.write.lkey = 0;
890 		send_info.remote_addr = arg_id + iter;
891 
892 		ret = dr_postsend_icm_data(dmn, &send_info);
893 		if (ret)
894 			goto out;
895 
896 		iter++;
897 		addr += cur_sent;
898 		data_len -= cur_sent;
899 	} while (data_len > 0);
900 
901 out:
902 	return ret;
903 }
904 
dr_modify_qp_rst2init(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,int port)905 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
906 				 struct mlx5dr_qp *dr_qp,
907 				 int port)
908 {
909 	u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
910 	void *qpc;
911 
912 	qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
913 
914 	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
915 	MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
916 	MLX5_SET(qpc, qpc, rre, 1);
917 	MLX5_SET(qpc, qpc, rwe, 1);
918 
919 	MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
920 	MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
921 
922 	return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
923 }
924 
dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rts_attr * attr)925 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
926 				    struct mlx5dr_qp *dr_qp,
927 				    struct dr_qp_rts_attr *attr)
928 {
929 	u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
930 	void *qpc;
931 
932 	qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
933 
934 	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
935 
936 	MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
937 	MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
938 	MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
939 
940 	MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
941 	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
942 
943 	return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
944 }
945 
dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rtr_attr * attr)946 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
947 				     struct mlx5dr_qp *dr_qp,
948 				     struct dr_qp_rtr_attr *attr)
949 {
950 	u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
951 	void *qpc;
952 
953 	qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
954 
955 	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
956 
957 	MLX5_SET(qpc, qpc, mtu, attr->mtu);
958 	MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
959 	MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
960 	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
961 	       attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
962 	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
963 	       attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
964 	MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
965 		 attr->sgid_index);
966 
967 	if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
968 		MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
969 			 attr->udp_src_port);
970 
971 	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
972 	MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
973 	MLX5_SET(qpc, qpc, min_rnr_nak, 1);
974 
975 	MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
976 	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
977 
978 	return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
979 }
980 
dr_send_allow_fl(struct mlx5dr_cmd_caps * caps)981 static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
982 {
983 	/* Check whether RC RoCE QP creation with force loopback is allowed.
984 	 * There are two separate capability bits for this:
985 	 *  - force loopback when RoCE is enabled
986 	 *  - force loopback when RoCE is disabled
987 	 */
988 	return ((caps->roce_caps.roce_en &&
989 		 caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
990 		(!caps->roce_caps.roce_en &&
991 		 caps->roce_caps.fl_rc_qp_when_roce_disabled));
992 }
993 
dr_prepare_qp_to_rts(struct mlx5dr_domain * dmn)994 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
995 {
996 	struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
997 	struct dr_qp_rts_attr rts_attr = {};
998 	struct dr_qp_rtr_attr rtr_attr = {};
999 	enum ib_mtu mtu = IB_MTU_1024;
1000 	u16 gid_index = 0;
1001 	int port = 1;
1002 	int ret;
1003 
1004 	/* Init */
1005 	ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
1006 	if (ret) {
1007 		mlx5dr_err(dmn, "Failed modify QP rst2init\n");
1008 		return ret;
1009 	}
1010 
1011 	/* RTR */
1012 	rtr_attr.mtu		= mtu;
1013 	rtr_attr.qp_num		= dr_qp->qpn;
1014 	rtr_attr.min_rnr_timer	= 12;
1015 	rtr_attr.port_num	= port;
1016 	rtr_attr.udp_src_port	= dmn->info.caps.roce_min_src_udp;
1017 
1018 	/* If QP creation with force loopback is allowed, then there
1019 	 * is no need for GID index when creating the QP.
1020 	 * Otherwise we query GID attributes and use GID index.
1021 	 */
1022 	rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
1023 	if (!rtr_attr.fl) {
1024 		ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
1025 					   &rtr_attr.dgid_attr);
1026 		if (ret)
1027 			return ret;
1028 
1029 		rtr_attr.sgid_index = gid_index;
1030 	}
1031 
1032 	ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
1033 	if (ret) {
1034 		mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
1035 		return ret;
1036 	}
1037 
1038 	/* RTS */
1039 	rts_attr.timeout	= 14;
1040 	rts_attr.retry_cnt	= 7;
1041 	rts_attr.rnr_retry	= 7;
1042 
1043 	ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
1044 	if (ret) {
1045 		mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
1046 		return ret;
1047 	}
1048 
1049 	return 0;
1050 }
1051 
dr_cq_complete(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)1052 static void dr_cq_complete(struct mlx5_core_cq *mcq,
1053 			   struct mlx5_eqe *eqe)
1054 {
1055 	pr_err("CQ completion CQ: #%u\n", mcq->cqn);
1056 }
1057 
dr_create_cq(struct mlx5_core_dev * mdev,struct mlx5_uars_page * uar,size_t ncqe)1058 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
1059 				      struct mlx5_uars_page *uar,
1060 				      size_t ncqe)
1061 {
1062 	u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
1063 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
1064 	struct mlx5_wq_param wqp;
1065 	struct mlx5_cqe64 *cqe;
1066 	struct mlx5dr_cq *cq;
1067 	int inlen, err, eqn;
1068 	void *cqc, *in;
1069 	__be64 *pas;
1070 	int vector;
1071 	u32 i;
1072 
1073 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
1074 	if (!cq)
1075 		return NULL;
1076 
1077 	ncqe = roundup_pow_of_two(ncqe);
1078 	MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
1079 
1080 	wqp.buf_numa_node = mdev->priv.numa_node;
1081 	wqp.db_numa_node = mdev->priv.numa_node;
1082 
1083 	err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
1084 			       &cq->wq_ctrl);
1085 	if (err)
1086 		goto out;
1087 
1088 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1089 		cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1090 		cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
1091 	}
1092 
1093 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1094 		sizeof(u64) * cq->wq_ctrl.buf.npages;
1095 	in = kvzalloc(inlen, GFP_KERNEL);
1096 	if (!in)
1097 		goto err_cqwq;
1098 
1099 	vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev);
1100 	err = mlx5_comp_eqn_get(mdev, vector, &eqn);
1101 	if (err) {
1102 		kvfree(in);
1103 		goto err_cqwq;
1104 	}
1105 
1106 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1107 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
1108 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
1109 	MLX5_SET(cqc, cqc, uar_page, uar->index);
1110 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1111 		 MLX5_ADAPTER_PAGE_SHIFT);
1112 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1113 
1114 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
1115 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
1116 
1117 	cq->mcq.comp  = dr_cq_complete;
1118 
1119 	err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
1120 	kvfree(in);
1121 
1122 	if (err)
1123 		goto err_cqwq;
1124 
1125 	cq->mcq.cqe_sz = 64;
1126 	cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
1127 	cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
1128 	*cq->mcq.set_ci_db = 0;
1129 
1130 	/* set no-zero value, in order to avoid the HW to run db-recovery on
1131 	 * CQ that used in polling mode.
1132 	 */
1133 	*cq->mcq.arm_db = cpu_to_be32(2 << 28);
1134 
1135 	cq->mcq.vector = 0;
1136 	cq->mcq.uar = uar;
1137 	cq->mdev = mdev;
1138 
1139 	return cq;
1140 
1141 err_cqwq:
1142 	mlx5_wq_destroy(&cq->wq_ctrl);
1143 out:
1144 	kfree(cq);
1145 	return NULL;
1146 }
1147 
dr_destroy_cq(struct mlx5_core_dev * mdev,struct mlx5dr_cq * cq)1148 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
1149 {
1150 	mlx5_core_destroy_cq(mdev, &cq->mcq);
1151 	mlx5_wq_destroy(&cq->wq_ctrl);
1152 	kfree(cq);
1153 }
1154 
dr_create_mkey(struct mlx5_core_dev * mdev,u32 pdn,u32 * mkey)1155 static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
1156 {
1157 	u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
1158 	void *mkc;
1159 
1160 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1161 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
1162 	MLX5_SET(mkc, mkc, a, 1);
1163 	MLX5_SET(mkc, mkc, rw, 1);
1164 	MLX5_SET(mkc, mkc, rr, 1);
1165 	MLX5_SET(mkc, mkc, lw, 1);
1166 	MLX5_SET(mkc, mkc, lr, 1);
1167 
1168 	MLX5_SET(mkc, mkc, pd, pdn);
1169 	MLX5_SET(mkc, mkc, length64, 1);
1170 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1171 
1172 	return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
1173 }
1174 
dr_reg_mr(struct mlx5_core_dev * mdev,u32 pdn,void * buf,size_t size)1175 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
1176 				   u32 pdn, void *buf, size_t size)
1177 {
1178 	struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1179 	struct device *dma_device;
1180 	dma_addr_t dma_addr;
1181 	int err;
1182 
1183 	if (!mr)
1184 		return NULL;
1185 
1186 	dma_device = mlx5_core_dma_dev(mdev);
1187 	dma_addr = dma_map_single(dma_device, buf, size,
1188 				  DMA_BIDIRECTIONAL);
1189 	err = dma_mapping_error(dma_device, dma_addr);
1190 	if (err) {
1191 		mlx5_core_warn(mdev, "Can't dma buf\n");
1192 		kfree(mr);
1193 		return NULL;
1194 	}
1195 
1196 	err = dr_create_mkey(mdev, pdn, &mr->mkey);
1197 	if (err) {
1198 		mlx5_core_warn(mdev, "Can't create mkey\n");
1199 		dma_unmap_single(dma_device, dma_addr, size,
1200 				 DMA_BIDIRECTIONAL);
1201 		kfree(mr);
1202 		return NULL;
1203 	}
1204 
1205 	mr->dma_addr = dma_addr;
1206 	mr->size = size;
1207 	mr->addr = buf;
1208 
1209 	return mr;
1210 }
1211 
dr_dereg_mr(struct mlx5_core_dev * mdev,struct mlx5dr_mr * mr)1212 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
1213 {
1214 	mlx5_core_destroy_mkey(mdev, mr->mkey);
1215 	dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
1216 			 DMA_BIDIRECTIONAL);
1217 	kfree(mr);
1218 }
1219 
mlx5dr_send_ring_alloc(struct mlx5dr_domain * dmn)1220 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
1221 {
1222 	struct dr_qp_init_attr init_attr = {};
1223 	int cq_size;
1224 	int size;
1225 	int ret;
1226 
1227 	dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
1228 	if (!dmn->send_ring)
1229 		return -ENOMEM;
1230 
1231 	cq_size = QUEUE_SIZE + 1;
1232 	dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
1233 	if (!dmn->send_ring->cq) {
1234 		mlx5dr_err(dmn, "Failed creating CQ\n");
1235 		ret = -ENOMEM;
1236 		goto free_send_ring;
1237 	}
1238 
1239 	init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
1240 	init_attr.pdn = dmn->pdn;
1241 	init_attr.uar = dmn->uar;
1242 	init_attr.max_send_wr = QUEUE_SIZE;
1243 
1244 	/* Isolated VL is applicable only if force loopback is supported */
1245 	if (dr_send_allow_fl(&dmn->info.caps))
1246 		init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
1247 
1248 	spin_lock_init(&dmn->send_ring->lock);
1249 
1250 	dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
1251 	if (!dmn->send_ring->qp)  {
1252 		mlx5dr_err(dmn, "Failed creating QP\n");
1253 		ret = -ENOMEM;
1254 		goto clean_cq;
1255 	}
1256 
1257 	dmn->send_ring->cq->qp = dmn->send_ring->qp;
1258 
1259 	dmn->info.max_send_wr = QUEUE_SIZE;
1260 	dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
1261 					DR_STE_SIZE);
1262 
1263 	dmn->send_ring->signal_th = dmn->info.max_send_wr /
1264 		SIGNAL_PER_DIV_QUEUE;
1265 
1266 	/* Prepare qp to be used */
1267 	ret = dr_prepare_qp_to_rts(dmn);
1268 	if (ret)
1269 		goto clean_qp;
1270 
1271 	dmn->send_ring->max_post_send_size =
1272 		mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
1273 						   DR_ICM_TYPE_STE);
1274 
1275 	/* Allocating the max size as a buffer for writing */
1276 	size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
1277 	dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
1278 	if (!dmn->send_ring->buf) {
1279 		ret = -ENOMEM;
1280 		goto clean_qp;
1281 	}
1282 
1283 	dmn->send_ring->buf_size = size;
1284 
1285 	dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
1286 				       dmn->pdn, dmn->send_ring->buf, size);
1287 	if (!dmn->send_ring->mr) {
1288 		ret = -ENOMEM;
1289 		goto free_mem;
1290 	}
1291 
1292 	dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size,
1293 					    GFP_KERNEL);
1294 	if (!dmn->send_ring->sync_buff) {
1295 		ret = -ENOMEM;
1296 		goto clean_mr;
1297 	}
1298 
1299 	dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
1300 					    dmn->pdn, dmn->send_ring->sync_buff,
1301 					    dmn->send_ring->max_post_send_size);
1302 	if (!dmn->send_ring->sync_mr) {
1303 		ret = -ENOMEM;
1304 		goto free_sync_mem;
1305 	}
1306 
1307 	return 0;
1308 
1309 free_sync_mem:
1310 	kfree(dmn->send_ring->sync_buff);
1311 clean_mr:
1312 	dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
1313 free_mem:
1314 	kfree(dmn->send_ring->buf);
1315 clean_qp:
1316 	dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
1317 clean_cq:
1318 	dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
1319 free_send_ring:
1320 	kfree(dmn->send_ring);
1321 
1322 	return ret;
1323 }
1324 
mlx5dr_send_ring_free(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)1325 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
1326 			   struct mlx5dr_send_ring *send_ring)
1327 {
1328 	dr_destroy_qp(dmn->mdev, send_ring->qp);
1329 	dr_destroy_cq(dmn->mdev, send_ring->cq);
1330 	dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
1331 	dr_dereg_mr(dmn->mdev, send_ring->mr);
1332 	kfree(send_ring->buf);
1333 	kfree(send_ring->sync_buff);
1334 	kfree(send_ring);
1335 }
1336 
mlx5dr_send_ring_force_drain(struct mlx5dr_domain * dmn)1337 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
1338 {
1339 	struct mlx5dr_send_ring *send_ring = dmn->send_ring;
1340 	struct postsend_info send_info = {};
1341 	u8 data[DR_STE_SIZE];
1342 	int num_of_sends_req;
1343 	int ret;
1344 	int i;
1345 
1346 	/* Sending this amount of requests makes sure we will get drain */
1347 	num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
1348 
1349 	/* Send fake requests forcing the last to be signaled */
1350 	send_info.write.addr = (uintptr_t)data;
1351 	send_info.write.length = DR_STE_SIZE;
1352 	send_info.write.lkey = 0;
1353 	/* Using the sync_mr in order to write/read */
1354 	send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
1355 	send_info.rkey = send_ring->sync_mr->mkey;
1356 
1357 	for (i = 0; i < num_of_sends_req; i++) {
1358 		ret = dr_postsend_icm_data(dmn, &send_info);
1359 		if (ret)
1360 			return ret;
1361 	}
1362 
1363 	spin_lock(&send_ring->lock);
1364 	ret = dr_handle_pending_wc(dmn, send_ring);
1365 	spin_unlock(&send_ring->lock);
1366 
1367 	return ret;
1368 }
1369