xref: /openbmc/linux/drivers/infiniband/hw/erdma/erdma_verbs.c (revision f4356947f0297b0962fdd197672db7edf9f58be6)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8 /* Copyright (c) 2008-2019, IBM Corporation */
9 
10 /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
11 
12 #include <linux/vmalloc.h>
13 #include <net/addrconf.h>
14 #include <rdma/erdma-abi.h>
15 #include <rdma/ib_umem.h>
16 #include <rdma/uverbs_ioctl.h>
17 
18 #include "erdma.h"
19 #include "erdma_cm.h"
20 #include "erdma_verbs.h"
21 
22 static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp)
23 {
24 	struct erdma_cmdq_create_qp_req req;
25 	struct erdma_pd *pd = to_epd(qp->ibqp.pd);
26 	struct erdma_uqp *user_qp;
27 	u64 resp0, resp1;
28 	int err;
29 
30 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
31 				CMDQ_OPCODE_CREATE_QP);
32 
33 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
34 			      ilog2(qp->attrs.sq_size)) |
35 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
36 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
37 			      ilog2(qp->attrs.rq_size)) |
38 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
39 
40 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
41 		u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
42 
43 		req.sq_cqn_mtt_cfg =
44 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
45 				   pgsz_range) |
46 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
47 		req.rq_cqn_mtt_cfg =
48 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
49 				   pgsz_range) |
50 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
51 
52 		req.sq_mtt_cfg =
53 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
54 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
55 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
56 				   ERDMA_MR_INLINE_MTT);
57 		req.rq_mtt_cfg = req.sq_mtt_cfg;
58 
59 		req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
60 		req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
61 		req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
62 					  (qp->attrs.sq_size << SQEBB_SHIFT);
63 		req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
64 					  (qp->attrs.rq_size << RQE_SHIFT);
65 	} else {
66 		user_qp = &qp->user_qp;
67 		req.sq_cqn_mtt_cfg = FIELD_PREP(
68 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
69 			ilog2(user_qp->sq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT);
70 		req.sq_cqn_mtt_cfg |=
71 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
72 
73 		req.rq_cqn_mtt_cfg = FIELD_PREP(
74 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
75 			ilog2(user_qp->rq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT);
76 		req.rq_cqn_mtt_cfg |=
77 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
78 
79 		req.sq_mtt_cfg = user_qp->sq_mtt.page_offset;
80 		req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
81 					     user_qp->sq_mtt.mtt_nents) |
82 				  FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
83 					     user_qp->sq_mtt.mtt_type);
84 
85 		req.rq_mtt_cfg = user_qp->rq_mtt.page_offset;
86 		req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
87 					     user_qp->rq_mtt.mtt_nents) |
88 				  FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
89 					     user_qp->rq_mtt.mtt_type);
90 
91 		req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0];
92 		req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0];
93 
94 		req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
95 		req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
96 	}
97 
98 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
99 				  &resp1);
100 	if (!err)
101 		qp->attrs.cookie =
102 			FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
103 
104 	return err;
105 }
106 
107 static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
108 {
109 	struct erdma_cmdq_reg_mr_req req;
110 	struct erdma_pd *pd = to_epd(mr->ibmr.pd);
111 	u64 *phy_addr;
112 	int i;
113 
114 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
115 
116 	req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
117 		   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
118 		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
119 	req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
120 		   FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
121 		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
122 	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
123 			      ilog2(mr->mem.page_size)) |
124 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
125 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
126 
127 	if (mr->type == ERDMA_MR_TYPE_DMA)
128 		goto post_cmd;
129 
130 	if (mr->type == ERDMA_MR_TYPE_NORMAL) {
131 		req.start_va = mr->mem.va;
132 		req.size = mr->mem.len;
133 	}
134 
135 	if (mr->type == ERDMA_MR_TYPE_FRMR ||
136 	    mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) {
137 		phy_addr = req.phy_addr;
138 		*phy_addr = mr->mem.mtt_entry[0];
139 	} else {
140 		phy_addr = req.phy_addr;
141 		for (i = 0; i < mr->mem.mtt_nents; i++)
142 			*phy_addr++ = mr->mem.mtt_entry[i];
143 	}
144 
145 post_cmd:
146 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
147 }
148 
149 static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq)
150 {
151 	struct erdma_cmdq_create_cq_req req;
152 	u32 page_size;
153 	struct erdma_mem *mtt;
154 
155 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
156 				CMDQ_OPCODE_CREATE_CQ);
157 
158 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
159 		   FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
160 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
161 
162 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
163 		page_size = SZ_32M;
164 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
165 				       ilog2(page_size) - ERDMA_HW_PAGE_SHIFT);
166 		req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
167 		req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
168 
169 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
170 			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
171 				       ERDMA_MR_INLINE_MTT);
172 
173 		req.first_page_offset = 0;
174 		req.cq_db_info_addr =
175 			cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
176 	} else {
177 		mtt = &cq->user_cq.qbuf_mtt;
178 		req.cfg0 |=
179 			FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
180 				   ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT);
181 		if (mtt->mtt_nents == 1) {
182 			req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf);
183 			req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf);
184 		} else {
185 			req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]);
186 			req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]);
187 		}
188 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
189 				       mtt->mtt_nents);
190 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
191 				       mtt->mtt_type);
192 
193 		req.first_page_offset = mtt->page_offset;
194 		req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
195 	}
196 
197 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
198 }
199 
200 static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
201 {
202 	int idx;
203 	unsigned long flags;
204 
205 	spin_lock_irqsave(&res_cb->lock, flags);
206 	idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
207 				 res_cb->next_alloc_idx);
208 	if (idx == res_cb->max_cap) {
209 		idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
210 		if (idx == res_cb->max_cap) {
211 			res_cb->next_alloc_idx = 1;
212 			spin_unlock_irqrestore(&res_cb->lock, flags);
213 			return -ENOSPC;
214 		}
215 	}
216 
217 	set_bit(idx, res_cb->bitmap);
218 	res_cb->next_alloc_idx = idx + 1;
219 	spin_unlock_irqrestore(&res_cb->lock, flags);
220 
221 	return idx;
222 }
223 
224 static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
225 {
226 	unsigned long flags;
227 	u32 used;
228 
229 	spin_lock_irqsave(&res_cb->lock, flags);
230 	used = __test_and_clear_bit(idx, res_cb->bitmap);
231 	spin_unlock_irqrestore(&res_cb->lock, flags);
232 	WARN_ON(!used);
233 }
234 
235 static struct rdma_user_mmap_entry *
236 erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
237 			     u32 size, u8 mmap_flag, u64 *mmap_offset)
238 {
239 	struct erdma_user_mmap_entry *entry =
240 		kzalloc(sizeof(*entry), GFP_KERNEL);
241 	int ret;
242 
243 	if (!entry)
244 		return NULL;
245 
246 	entry->address = (u64)address;
247 	entry->mmap_flag = mmap_flag;
248 
249 	size = PAGE_ALIGN(size);
250 
251 	ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
252 					  size);
253 	if (ret) {
254 		kfree(entry);
255 		return NULL;
256 	}
257 
258 	*mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
259 
260 	return &entry->rdma_entry;
261 }
262 
263 int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
264 		       struct ib_udata *unused)
265 {
266 	struct erdma_dev *dev = to_edev(ibdev);
267 
268 	memset(attr, 0, sizeof(*attr));
269 
270 	attr->max_mr_size = dev->attrs.max_mr_size;
271 	attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
272 	attr->vendor_part_id = dev->pdev->device;
273 	attr->hw_ver = dev->pdev->revision;
274 	attr->max_qp = dev->attrs.max_qp - 1;
275 	attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
276 	attr->max_qp_rd_atom = dev->attrs.max_ord;
277 	attr->max_qp_init_rd_atom = dev->attrs.max_ird;
278 	attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
279 	attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
280 	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
281 	ibdev->local_dma_lkey = dev->attrs.local_dma_key;
282 	attr->max_send_sge = dev->attrs.max_send_sge;
283 	attr->max_recv_sge = dev->attrs.max_recv_sge;
284 	attr->max_sge_rd = dev->attrs.max_sge_rd;
285 	attr->max_cq = dev->attrs.max_cq - 1;
286 	attr->max_cqe = dev->attrs.max_cqe;
287 	attr->max_mr = dev->attrs.max_mr;
288 	attr->max_pd = dev->attrs.max_pd;
289 	attr->max_mw = dev->attrs.max_mw;
290 	attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
291 	attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
292 
293 	if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
294 		attr->atomic_cap = IB_ATOMIC_GLOB;
295 
296 	attr->fw_ver = dev->attrs.fw_version;
297 
298 	if (dev->netdev)
299 		addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
300 				    dev->netdev->dev_addr);
301 
302 	return 0;
303 }
304 
305 int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
306 		    union ib_gid *gid)
307 {
308 	struct erdma_dev *dev = to_edev(ibdev);
309 
310 	memset(gid, 0, sizeof(*gid));
311 	ether_addr_copy(gid->raw, dev->attrs.peer_addr);
312 
313 	return 0;
314 }
315 
316 int erdma_query_port(struct ib_device *ibdev, u32 port,
317 		     struct ib_port_attr *attr)
318 {
319 	struct erdma_dev *dev = to_edev(ibdev);
320 	struct net_device *ndev = dev->netdev;
321 
322 	memset(attr, 0, sizeof(*attr));
323 
324 	attr->gid_tbl_len = 1;
325 	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
326 	attr->max_msg_sz = -1;
327 
328 	if (!ndev)
329 		goto out;
330 
331 	ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
332 	attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
333 	attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
334 	if (netif_running(ndev) && netif_carrier_ok(ndev))
335 		dev->state = IB_PORT_ACTIVE;
336 	else
337 		dev->state = IB_PORT_DOWN;
338 	attr->state = dev->state;
339 
340 out:
341 	if (dev->state == IB_PORT_ACTIVE)
342 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
343 	else
344 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
345 
346 	return 0;
347 }
348 
349 int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
350 			     struct ib_port_immutable *port_immutable)
351 {
352 	port_immutable->gid_tbl_len = 1;
353 	port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
354 
355 	return 0;
356 }
357 
358 int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
359 {
360 	struct erdma_pd *pd = to_epd(ibpd);
361 	struct erdma_dev *dev = to_edev(ibpd->device);
362 	int pdn;
363 
364 	pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
365 	if (pdn < 0)
366 		return pdn;
367 
368 	pd->pdn = pdn;
369 
370 	return 0;
371 }
372 
373 int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
374 {
375 	struct erdma_pd *pd = to_epd(ibpd);
376 	struct erdma_dev *dev = to_edev(ibpd->device);
377 
378 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
379 
380 	return 0;
381 }
382 
383 static void erdma_flush_worker(struct work_struct *work)
384 {
385 	struct delayed_work *dwork = to_delayed_work(work);
386 	struct erdma_qp *qp =
387 		container_of(dwork, struct erdma_qp, reflush_dwork);
388 	struct erdma_cmdq_reflush_req req;
389 
390 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
391 				CMDQ_OPCODE_REFLUSH);
392 	req.qpn = QP_ID(qp);
393 	req.sq_pi = qp->kern_qp.sq_pi;
394 	req.rq_pi = qp->kern_qp.rq_pi;
395 	erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
396 }
397 
398 static int erdma_qp_validate_cap(struct erdma_dev *dev,
399 				 struct ib_qp_init_attr *attrs)
400 {
401 	if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
402 	    (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
403 	    (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
404 	    (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
405 	    (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
406 	    !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
407 		return -EINVAL;
408 	}
409 
410 	return 0;
411 }
412 
413 static int erdma_qp_validate_attr(struct erdma_dev *dev,
414 				  struct ib_qp_init_attr *attrs)
415 {
416 	if (attrs->qp_type != IB_QPT_RC)
417 		return -EOPNOTSUPP;
418 
419 	if (attrs->srq)
420 		return -EOPNOTSUPP;
421 
422 	if (!attrs->send_cq || !attrs->recv_cq)
423 		return -EOPNOTSUPP;
424 
425 	return 0;
426 }
427 
428 static void free_kernel_qp(struct erdma_qp *qp)
429 {
430 	struct erdma_dev *dev = qp->dev;
431 
432 	vfree(qp->kern_qp.swr_tbl);
433 	vfree(qp->kern_qp.rwr_tbl);
434 
435 	if (qp->kern_qp.sq_buf)
436 		dma_free_coherent(
437 			&dev->pdev->dev,
438 			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
439 			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
440 
441 	if (qp->kern_qp.rq_buf)
442 		dma_free_coherent(
443 			&dev->pdev->dev,
444 			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
445 			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
446 }
447 
448 static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
449 			  struct ib_qp_init_attr *attrs)
450 {
451 	struct erdma_kqp *kqp = &qp->kern_qp;
452 	int size;
453 
454 	if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
455 		kqp->sig_all = 1;
456 
457 	kqp->sq_pi = 0;
458 	kqp->sq_ci = 0;
459 	kqp->rq_pi = 0;
460 	kqp->rq_ci = 0;
461 	kqp->hw_sq_db =
462 		dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
463 	kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
464 
465 	kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64));
466 	kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64));
467 	if (!kqp->swr_tbl || !kqp->rwr_tbl)
468 		goto err_out;
469 
470 	size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
471 	kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
472 					 &kqp->sq_buf_dma_addr, GFP_KERNEL);
473 	if (!kqp->sq_buf)
474 		goto err_out;
475 
476 	size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
477 	kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
478 					 &kqp->rq_buf_dma_addr, GFP_KERNEL);
479 	if (!kqp->rq_buf)
480 		goto err_out;
481 
482 	kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
483 	kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
484 
485 	return 0;
486 
487 err_out:
488 	free_kernel_qp(qp);
489 	return -ENOMEM;
490 }
491 
492 static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
493 			   u64 start, u64 len, int access, u64 virt,
494 			   unsigned long req_page_size, u8 force_indirect_mtt)
495 {
496 	struct ib_block_iter biter;
497 	uint64_t *phy_addr = NULL;
498 	int ret = 0;
499 
500 	mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
501 	if (IS_ERR(mem->umem)) {
502 		ret = PTR_ERR(mem->umem);
503 		mem->umem = NULL;
504 		return ret;
505 	}
506 
507 	mem->va = virt;
508 	mem->len = len;
509 	mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
510 	mem->page_offset = start & (mem->page_size - 1);
511 	mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
512 	mem->page_cnt = mem->mtt_nents;
513 
514 	if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES ||
515 	    force_indirect_mtt) {
516 		mem->mtt_type = ERDMA_MR_INDIRECT_MTT;
517 		mem->mtt_buf =
518 			alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL);
519 		if (!mem->mtt_buf) {
520 			ret = -ENOMEM;
521 			goto error_ret;
522 		}
523 		phy_addr = mem->mtt_buf;
524 	} else {
525 		mem->mtt_type = ERDMA_MR_INLINE_MTT;
526 		phy_addr = mem->mtt_entry;
527 	}
528 
529 	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) {
530 		*phy_addr = rdma_block_iter_dma_address(&biter);
531 		phy_addr++;
532 	}
533 
534 	if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) {
535 		mem->mtt_entry[0] =
536 			dma_map_single(&dev->pdev->dev, mem->mtt_buf,
537 				       MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
538 		if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) {
539 			free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
540 			mem->mtt_buf = NULL;
541 			ret = -ENOMEM;
542 			goto error_ret;
543 		}
544 	}
545 
546 	return 0;
547 
548 error_ret:
549 	if (mem->umem) {
550 		ib_umem_release(mem->umem);
551 		mem->umem = NULL;
552 	}
553 
554 	return ret;
555 }
556 
557 static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
558 {
559 	if (mem->mtt_buf) {
560 		dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0],
561 				 MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
562 		free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
563 	}
564 
565 	if (mem->umem) {
566 		ib_umem_release(mem->umem);
567 		mem->umem = NULL;
568 	}
569 }
570 
571 static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
572 				    u64 dbrecords_va,
573 				    struct erdma_user_dbrecords_page **dbr_page,
574 				    dma_addr_t *dma_addr)
575 {
576 	struct erdma_user_dbrecords_page *page = NULL;
577 	int rv = 0;
578 
579 	mutex_lock(&ctx->dbrecords_page_mutex);
580 
581 	list_for_each_entry(page, &ctx->dbrecords_page_list, list)
582 		if (page->va == (dbrecords_va & PAGE_MASK))
583 			goto found;
584 
585 	page = kmalloc(sizeof(*page), GFP_KERNEL);
586 	if (!page) {
587 		rv = -ENOMEM;
588 		goto out;
589 	}
590 
591 	page->va = (dbrecords_va & PAGE_MASK);
592 	page->refcnt = 0;
593 
594 	page->umem = ib_umem_get(ctx->ibucontext.device,
595 				 dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
596 	if (IS_ERR(page->umem)) {
597 		rv = PTR_ERR(page->umem);
598 		kfree(page);
599 		goto out;
600 	}
601 
602 	list_add(&page->list, &ctx->dbrecords_page_list);
603 
604 found:
605 	*dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
606 		    (dbrecords_va & ~PAGE_MASK);
607 	*dbr_page = page;
608 	page->refcnt++;
609 
610 out:
611 	mutex_unlock(&ctx->dbrecords_page_mutex);
612 	return rv;
613 }
614 
615 static void
616 erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
617 			   struct erdma_user_dbrecords_page **dbr_page)
618 {
619 	if (!ctx || !(*dbr_page))
620 		return;
621 
622 	mutex_lock(&ctx->dbrecords_page_mutex);
623 	if (--(*dbr_page)->refcnt == 0) {
624 		list_del(&(*dbr_page)->list);
625 		ib_umem_release((*dbr_page)->umem);
626 		kfree(*dbr_page);
627 	}
628 
629 	*dbr_page = NULL;
630 	mutex_unlock(&ctx->dbrecords_page_mutex);
631 }
632 
633 static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
634 			u64 va, u32 len, u64 db_info_va)
635 {
636 	dma_addr_t db_info_dma_addr;
637 	u32 rq_offset;
638 	int ret;
639 
640 	if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) +
641 		   qp->attrs.rq_size * RQE_SIZE))
642 		return -EINVAL;
643 
644 	ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va,
645 			      qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
646 			      (SZ_1M - SZ_4K), 1);
647 	if (ret)
648 		return ret;
649 
650 	rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
651 	qp->user_qp.rq_offset = rq_offset;
652 
653 	ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset,
654 			      qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
655 			      (SZ_1M - SZ_4K), 1);
656 	if (ret)
657 		goto put_sq_mtt;
658 
659 	ret = erdma_map_user_dbrecords(uctx, db_info_va,
660 				       &qp->user_qp.user_dbr_page,
661 				       &db_info_dma_addr);
662 	if (ret)
663 		goto put_rq_mtt;
664 
665 	qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
666 	qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
667 
668 	return 0;
669 
670 put_rq_mtt:
671 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
672 
673 put_sq_mtt:
674 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
675 
676 	return ret;
677 }
678 
679 static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
680 {
681 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
682 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
683 	erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
684 }
685 
686 int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
687 		    struct ib_udata *udata)
688 {
689 	struct erdma_qp *qp = to_eqp(ibqp);
690 	struct erdma_dev *dev = to_edev(ibqp->device);
691 	struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
692 		udata, struct erdma_ucontext, ibucontext);
693 	struct erdma_ureq_create_qp ureq;
694 	struct erdma_uresp_create_qp uresp;
695 	int ret;
696 
697 	ret = erdma_qp_validate_cap(dev, attrs);
698 	if (ret)
699 		goto err_out;
700 
701 	ret = erdma_qp_validate_attr(dev, attrs);
702 	if (ret)
703 		goto err_out;
704 
705 	qp->scq = to_ecq(attrs->send_cq);
706 	qp->rcq = to_ecq(attrs->recv_cq);
707 	qp->dev = dev;
708 	qp->attrs.cc = dev->attrs.cc;
709 
710 	init_rwsem(&qp->state_lock);
711 	kref_init(&qp->ref);
712 	init_completion(&qp->safe_free);
713 
714 	ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
715 			      XA_LIMIT(1, dev->attrs.max_qp - 1),
716 			      &dev->next_alloc_qpn, GFP_KERNEL);
717 	if (ret < 0) {
718 		ret = -ENOMEM;
719 		goto err_out;
720 	}
721 
722 	qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
723 					       ERDMA_MAX_WQEBB_PER_SQE);
724 	qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
725 
726 	if (uctx) {
727 		ret = ib_copy_from_udata(&ureq, udata,
728 					 min(sizeof(ureq), udata->inlen));
729 		if (ret)
730 			goto err_out_xa;
731 
732 		ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
733 				   ureq.db_record_va);
734 		if (ret)
735 			goto err_out_xa;
736 
737 		memset(&uresp, 0, sizeof(uresp));
738 
739 		uresp.num_sqe = qp->attrs.sq_size;
740 		uresp.num_rqe = qp->attrs.rq_size;
741 		uresp.qp_id = QP_ID(qp);
742 		uresp.rq_offset = qp->user_qp.rq_offset;
743 
744 		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
745 		if (ret)
746 			goto err_out_cmd;
747 	} else {
748 		init_kernel_qp(dev, qp, attrs);
749 	}
750 
751 	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
752 	qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
753 	qp->attrs.state = ERDMA_QP_STATE_IDLE;
754 	INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
755 
756 	ret = create_qp_cmd(dev, qp);
757 	if (ret)
758 		goto err_out_cmd;
759 
760 	spin_lock_init(&qp->lock);
761 
762 	return 0;
763 
764 err_out_cmd:
765 	if (uctx)
766 		free_user_qp(qp, uctx);
767 	else
768 		free_kernel_qp(qp);
769 err_out_xa:
770 	xa_erase(&dev->qp_xa, QP_ID(qp));
771 err_out:
772 	return ret;
773 }
774 
775 static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
776 {
777 	int stag_idx;
778 
779 	stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
780 	if (stag_idx < 0)
781 		return stag_idx;
782 
783 	/* For now, we always let key field be zero. */
784 	*stag = (stag_idx << 8);
785 
786 	return 0;
787 }
788 
789 struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
790 {
791 	struct erdma_dev *dev = to_edev(ibpd->device);
792 	struct erdma_mr *mr;
793 	u32 stag;
794 	int ret;
795 
796 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
797 	if (!mr)
798 		return ERR_PTR(-ENOMEM);
799 
800 	ret = erdma_create_stag(dev, &stag);
801 	if (ret)
802 		goto out_free;
803 
804 	mr->type = ERDMA_MR_TYPE_DMA;
805 
806 	mr->ibmr.lkey = stag;
807 	mr->ibmr.rkey = stag;
808 	mr->ibmr.pd = ibpd;
809 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
810 	ret = regmr_cmd(dev, mr);
811 	if (ret)
812 		goto out_remove_stag;
813 
814 	return &mr->ibmr;
815 
816 out_remove_stag:
817 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
818 		       mr->ibmr.lkey >> 8);
819 
820 out_free:
821 	kfree(mr);
822 
823 	return ERR_PTR(ret);
824 }
825 
826 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
827 				u32 max_num_sg)
828 {
829 	struct erdma_mr *mr;
830 	struct erdma_dev *dev = to_edev(ibpd->device);
831 	int ret;
832 	u32 stag;
833 
834 	if (mr_type != IB_MR_TYPE_MEM_REG)
835 		return ERR_PTR(-EOPNOTSUPP);
836 
837 	if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
838 		return ERR_PTR(-EINVAL);
839 
840 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
841 	if (!mr)
842 		return ERR_PTR(-ENOMEM);
843 
844 	ret = erdma_create_stag(dev, &stag);
845 	if (ret)
846 		goto out_free;
847 
848 	mr->type = ERDMA_MR_TYPE_FRMR;
849 
850 	mr->ibmr.lkey = stag;
851 	mr->ibmr.rkey = stag;
852 	mr->ibmr.pd = ibpd;
853 	/* update it in FRMR. */
854 	mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
855 		     ERDMA_MR_ACC_RW;
856 
857 	mr->mem.page_size = PAGE_SIZE; /* update it later. */
858 	mr->mem.page_cnt = max_num_sg;
859 	mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT;
860 	mr->mem.mtt_buf =
861 		alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL);
862 	if (!mr->mem.mtt_buf) {
863 		ret = -ENOMEM;
864 		goto out_remove_stag;
865 	}
866 
867 	mr->mem.mtt_entry[0] =
868 		dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf,
869 			       MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
870 	if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) {
871 		ret = -ENOMEM;
872 		goto out_free_mtt;
873 	}
874 
875 	ret = regmr_cmd(dev, mr);
876 	if (ret)
877 		goto out_dma_unmap;
878 
879 	return &mr->ibmr;
880 
881 out_dma_unmap:
882 	dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0],
883 			 MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
884 out_free_mtt:
885 	free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt));
886 
887 out_remove_stag:
888 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
889 		       mr->ibmr.lkey >> 8);
890 
891 out_free:
892 	kfree(mr);
893 
894 	return ERR_PTR(ret);
895 }
896 
897 static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
898 {
899 	struct erdma_mr *mr = to_emr(ibmr);
900 
901 	if (mr->mem.mtt_nents >= mr->mem.page_cnt)
902 		return -1;
903 
904 	*((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr;
905 	mr->mem.mtt_nents++;
906 
907 	return 0;
908 }
909 
910 int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
911 		    unsigned int *sg_offset)
912 {
913 	struct erdma_mr *mr = to_emr(ibmr);
914 	int num;
915 
916 	mr->mem.mtt_nents = 0;
917 
918 	num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
919 			     erdma_set_page);
920 
921 	return num;
922 }
923 
924 struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
925 				u64 virt, int access, struct ib_udata *udata)
926 {
927 	struct erdma_mr *mr = NULL;
928 	struct erdma_dev *dev = to_edev(ibpd->device);
929 	u32 stag;
930 	int ret;
931 
932 	if (!len || len > dev->attrs.max_mr_size)
933 		return ERR_PTR(-EINVAL);
934 
935 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
936 	if (!mr)
937 		return ERR_PTR(-ENOMEM);
938 
939 	ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
940 			      SZ_2G - SZ_4K, 0);
941 	if (ret)
942 		goto err_out_free;
943 
944 	ret = erdma_create_stag(dev, &stag);
945 	if (ret)
946 		goto err_out_put_mtt;
947 
948 	mr->ibmr.lkey = mr->ibmr.rkey = stag;
949 	mr->ibmr.pd = ibpd;
950 	mr->mem.va = virt;
951 	mr->mem.len = len;
952 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
953 	mr->valid = 1;
954 	mr->type = ERDMA_MR_TYPE_NORMAL;
955 
956 	ret = regmr_cmd(dev, mr);
957 	if (ret)
958 		goto err_out_mr;
959 
960 	return &mr->ibmr;
961 
962 err_out_mr:
963 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
964 		       mr->ibmr.lkey >> 8);
965 
966 err_out_put_mtt:
967 	put_mtt_entries(dev, &mr->mem);
968 
969 err_out_free:
970 	kfree(mr);
971 
972 	return ERR_PTR(ret);
973 }
974 
975 int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
976 {
977 	struct erdma_mr *mr;
978 	struct erdma_dev *dev = to_edev(ibmr->device);
979 	struct erdma_cmdq_dereg_mr_req req;
980 	int ret;
981 
982 	mr = to_emr(ibmr);
983 
984 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
985 				CMDQ_OPCODE_DEREG_MR);
986 
987 	req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
988 		  FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
989 
990 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
991 	if (ret)
992 		return ret;
993 
994 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
995 
996 	put_mtt_entries(dev, &mr->mem);
997 
998 	kfree(mr);
999 	return 0;
1000 }
1001 
1002 int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1003 {
1004 	struct erdma_cq *cq = to_ecq(ibcq);
1005 	struct erdma_dev *dev = to_edev(ibcq->device);
1006 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1007 		udata, struct erdma_ucontext, ibucontext);
1008 	int err;
1009 	struct erdma_cmdq_destroy_cq_req req;
1010 
1011 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1012 				CMDQ_OPCODE_DESTROY_CQ);
1013 	req.cqn = cq->cqn;
1014 
1015 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1016 	if (err)
1017 		return err;
1018 
1019 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
1020 		dma_free_coherent(&dev->pdev->dev,
1021 				  WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1022 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1023 	} else {
1024 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1025 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1026 	}
1027 
1028 	xa_erase(&dev->cq_xa, cq->cqn);
1029 
1030 	return 0;
1031 }
1032 
1033 int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1034 {
1035 	struct erdma_qp *qp = to_eqp(ibqp);
1036 	struct erdma_dev *dev = to_edev(ibqp->device);
1037 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1038 		udata, struct erdma_ucontext, ibucontext);
1039 	struct erdma_qp_attrs qp_attrs;
1040 	int err;
1041 	struct erdma_cmdq_destroy_qp_req req;
1042 
1043 	down_write(&qp->state_lock);
1044 	qp_attrs.state = ERDMA_QP_STATE_ERROR;
1045 	erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
1046 	up_write(&qp->state_lock);
1047 
1048 	cancel_delayed_work_sync(&qp->reflush_dwork);
1049 
1050 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1051 				CMDQ_OPCODE_DESTROY_QP);
1052 	req.qpn = QP_ID(qp);
1053 
1054 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1055 	if (err)
1056 		return err;
1057 
1058 	erdma_qp_put(qp);
1059 	wait_for_completion(&qp->safe_free);
1060 
1061 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
1062 		vfree(qp->kern_qp.swr_tbl);
1063 		vfree(qp->kern_qp.rwr_tbl);
1064 		dma_free_coherent(
1065 			&dev->pdev->dev,
1066 			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
1067 			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
1068 		dma_free_coherent(
1069 			&dev->pdev->dev,
1070 			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
1071 			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
1072 	} else {
1073 		put_mtt_entries(dev, &qp->user_qp.sq_mtt);
1074 		put_mtt_entries(dev, &qp->user_qp.rq_mtt);
1075 		erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1076 	}
1077 
1078 	if (qp->cep)
1079 		erdma_cep_put(qp->cep);
1080 	xa_erase(&dev->qp_xa, QP_ID(qp));
1081 
1082 	return 0;
1083 }
1084 
1085 void erdma_qp_get_ref(struct ib_qp *ibqp)
1086 {
1087 	erdma_qp_get(to_eqp(ibqp));
1088 }
1089 
1090 void erdma_qp_put_ref(struct ib_qp *ibqp)
1091 {
1092 	erdma_qp_put(to_eqp(ibqp));
1093 }
1094 
1095 int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1096 {
1097 	struct rdma_user_mmap_entry *rdma_entry;
1098 	struct erdma_user_mmap_entry *entry;
1099 	pgprot_t prot;
1100 	int err;
1101 
1102 	rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1103 	if (!rdma_entry)
1104 		return -EINVAL;
1105 
1106 	entry = to_emmap(rdma_entry);
1107 
1108 	switch (entry->mmap_flag) {
1109 	case ERDMA_MMAP_IO_NC:
1110 		/* map doorbell. */
1111 		prot = pgprot_device(vma->vm_page_prot);
1112 		break;
1113 	default:
1114 		err = -EINVAL;
1115 		goto put_entry;
1116 	}
1117 
1118 	err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1119 				prot, rdma_entry);
1120 
1121 put_entry:
1122 	rdma_user_mmap_entry_put(rdma_entry);
1123 	return err;
1124 }
1125 
1126 void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1127 {
1128 	struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1129 
1130 	kfree(entry);
1131 }
1132 
1133 #define ERDMA_SDB_PAGE 0
1134 #define ERDMA_SDB_ENTRY 1
1135 #define ERDMA_SDB_SHARED 2
1136 
1137 static void alloc_db_resources(struct erdma_dev *dev,
1138 			       struct erdma_ucontext *ctx)
1139 {
1140 	u32 bitmap_idx;
1141 	struct erdma_devattr *attrs = &dev->attrs;
1142 
1143 	if (attrs->disable_dwqe)
1144 		goto alloc_normal_db;
1145 
1146 	/* Try to alloc independent SDB page. */
1147 	spin_lock(&dev->db_bitmap_lock);
1148 	bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages);
1149 	if (bitmap_idx != attrs->dwqe_pages) {
1150 		set_bit(bitmap_idx, dev->sdb_page);
1151 		spin_unlock(&dev->db_bitmap_lock);
1152 
1153 		ctx->sdb_type = ERDMA_SDB_PAGE;
1154 		ctx->sdb_idx = bitmap_idx;
1155 		ctx->sdb_page_idx = bitmap_idx;
1156 		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1157 			   (bitmap_idx << PAGE_SHIFT);
1158 		ctx->sdb_page_off = 0;
1159 
1160 		return;
1161 	}
1162 
1163 	bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries);
1164 	if (bitmap_idx != attrs->dwqe_entries) {
1165 		set_bit(bitmap_idx, dev->sdb_entry);
1166 		spin_unlock(&dev->db_bitmap_lock);
1167 
1168 		ctx->sdb_type = ERDMA_SDB_ENTRY;
1169 		ctx->sdb_idx = bitmap_idx;
1170 		ctx->sdb_page_idx = attrs->dwqe_pages +
1171 				    bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1172 		ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1173 
1174 		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1175 			   (ctx->sdb_page_idx << PAGE_SHIFT);
1176 
1177 		return;
1178 	}
1179 
1180 	spin_unlock(&dev->db_bitmap_lock);
1181 
1182 alloc_normal_db:
1183 	ctx->sdb_type = ERDMA_SDB_SHARED;
1184 	ctx->sdb_idx = 0;
1185 	ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX;
1186 	ctx->sdb_page_off = 0;
1187 
1188 	ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT);
1189 }
1190 
1191 static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1192 {
1193 	rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1194 	rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1195 	rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1196 }
1197 
1198 int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1199 {
1200 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1201 	struct erdma_dev *dev = to_edev(ibctx->device);
1202 	int ret;
1203 	struct erdma_uresp_alloc_ctx uresp = {};
1204 
1205 	if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1206 		ret = -ENOMEM;
1207 		goto err_out;
1208 	}
1209 
1210 	INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1211 	mutex_init(&ctx->dbrecords_page_mutex);
1212 
1213 	alloc_db_resources(dev, ctx);
1214 
1215 	ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1216 	ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1217 
1218 	if (udata->outlen < sizeof(uresp)) {
1219 		ret = -EINVAL;
1220 		goto err_out;
1221 	}
1222 
1223 	ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1224 		ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1225 	if (!ctx->sq_db_mmap_entry) {
1226 		ret = -ENOMEM;
1227 		goto err_out;
1228 	}
1229 
1230 	ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1231 		ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1232 	if (!ctx->rq_db_mmap_entry) {
1233 		ret = -EINVAL;
1234 		goto err_out;
1235 	}
1236 
1237 	ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1238 		ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1239 	if (!ctx->cq_db_mmap_entry) {
1240 		ret = -EINVAL;
1241 		goto err_out;
1242 	}
1243 
1244 	uresp.dev_id = dev->pdev->device;
1245 	uresp.sdb_type = ctx->sdb_type;
1246 	uresp.sdb_offset = ctx->sdb_page_off;
1247 
1248 	ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1249 	if (ret)
1250 		goto err_out;
1251 
1252 	return 0;
1253 
1254 err_out:
1255 	erdma_uctx_user_mmap_entries_remove(ctx);
1256 	atomic_dec(&dev->num_ctx);
1257 	return ret;
1258 }
1259 
1260 void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1261 {
1262 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1263 	struct erdma_dev *dev = to_edev(ibctx->device);
1264 
1265 	spin_lock(&dev->db_bitmap_lock);
1266 	if (ctx->sdb_type == ERDMA_SDB_PAGE)
1267 		clear_bit(ctx->sdb_idx, dev->sdb_page);
1268 	else if (ctx->sdb_type == ERDMA_SDB_ENTRY)
1269 		clear_bit(ctx->sdb_idx, dev->sdb_entry);
1270 
1271 	erdma_uctx_user_mmap_entries_remove(ctx);
1272 
1273 	spin_unlock(&dev->db_bitmap_lock);
1274 
1275 	atomic_dec(&dev->num_ctx);
1276 }
1277 
1278 static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
1279 	[IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
1280 	[IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
1281 	[IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
1282 	[IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
1283 	[IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
1284 	[IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
1285 	[IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
1286 };
1287 
1288 int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1289 		    struct ib_udata *udata)
1290 {
1291 	struct erdma_qp_attrs new_attrs;
1292 	enum erdma_qp_attr_mask erdma_attr_mask = 0;
1293 	struct erdma_qp *qp = to_eqp(ibqp);
1294 	int ret = 0;
1295 
1296 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1297 		return -EOPNOTSUPP;
1298 
1299 	memset(&new_attrs, 0, sizeof(new_attrs));
1300 
1301 	if (attr_mask & IB_QP_STATE) {
1302 		new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
1303 
1304 		erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
1305 	}
1306 
1307 	down_write(&qp->state_lock);
1308 
1309 	ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
1310 
1311 	up_write(&qp->state_lock);
1312 
1313 	return ret;
1314 }
1315 
1316 int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1317 		   int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1318 {
1319 	struct erdma_qp *qp;
1320 	struct erdma_dev *dev;
1321 
1322 	if (ibqp && qp_attr && qp_init_attr) {
1323 		qp = to_eqp(ibqp);
1324 		dev = to_edev(ibqp->device);
1325 	} else {
1326 		return -EINVAL;
1327 	}
1328 
1329 	qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1330 	qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1331 
1332 	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1333 	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1334 	qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1335 	qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1336 
1337 	qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1338 	qp_attr->max_rd_atomic = qp->attrs.irq_size;
1339 	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1340 
1341 	qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1342 				   IB_ACCESS_REMOTE_WRITE |
1343 				   IB_ACCESS_REMOTE_READ;
1344 
1345 	qp_init_attr->cap = qp_attr->cap;
1346 
1347 	return 0;
1348 }
1349 
1350 static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1351 			      struct erdma_ureq_create_cq *ureq)
1352 {
1353 	int ret;
1354 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1355 
1356 	ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va,
1357 			      ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1358 			      1);
1359 	if (ret)
1360 		return ret;
1361 
1362 	ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1363 				       &cq->user_cq.user_dbr_page,
1364 				       &cq->user_cq.db_info_dma_addr);
1365 	if (ret)
1366 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1367 
1368 	return ret;
1369 }
1370 
1371 static int erdma_init_kernel_cq(struct erdma_cq *cq)
1372 {
1373 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1374 
1375 	cq->kern_cq.qbuf =
1376 		dma_alloc_coherent(&dev->pdev->dev,
1377 				   WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1378 				   &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1379 	if (!cq->kern_cq.qbuf)
1380 		return -ENOMEM;
1381 
1382 	cq->kern_cq.db_record =
1383 		(u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
1384 	spin_lock_init(&cq->kern_cq.lock);
1385 	/* use default cqdb addr */
1386 	cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1387 
1388 	return 0;
1389 }
1390 
1391 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1392 		    struct ib_udata *udata)
1393 {
1394 	struct erdma_cq *cq = to_ecq(ibcq);
1395 	struct erdma_dev *dev = to_edev(ibcq->device);
1396 	unsigned int depth = attr->cqe;
1397 	int ret;
1398 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1399 		udata, struct erdma_ucontext, ibucontext);
1400 
1401 	if (depth > dev->attrs.max_cqe)
1402 		return -EINVAL;
1403 
1404 	depth = roundup_pow_of_two(depth);
1405 	cq->ibcq.cqe = depth;
1406 	cq->depth = depth;
1407 	cq->assoc_eqn = attr->comp_vector + 1;
1408 
1409 	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1410 			      XA_LIMIT(1, dev->attrs.max_cq - 1),
1411 			      &dev->next_alloc_cqn, GFP_KERNEL);
1412 	if (ret < 0)
1413 		return ret;
1414 
1415 	if (!rdma_is_kernel_res(&ibcq->res)) {
1416 		struct erdma_ureq_create_cq ureq;
1417 		struct erdma_uresp_create_cq uresp;
1418 
1419 		ret = ib_copy_from_udata(&ureq, udata,
1420 					 min(udata->inlen, sizeof(ureq)));
1421 		if (ret)
1422 			goto err_out_xa;
1423 
1424 		ret = erdma_init_user_cq(ctx, cq, &ureq);
1425 		if (ret)
1426 			goto err_out_xa;
1427 
1428 		uresp.cq_id = cq->cqn;
1429 		uresp.num_cqe = depth;
1430 
1431 		ret = ib_copy_to_udata(udata, &uresp,
1432 				       min(sizeof(uresp), udata->outlen));
1433 		if (ret)
1434 			goto err_free_res;
1435 	} else {
1436 		ret = erdma_init_kernel_cq(cq);
1437 		if (ret)
1438 			goto err_out_xa;
1439 	}
1440 
1441 	ret = create_cq_cmd(dev, cq);
1442 	if (ret)
1443 		goto err_free_res;
1444 
1445 	return 0;
1446 
1447 err_free_res:
1448 	if (!rdma_is_kernel_res(&ibcq->res)) {
1449 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1450 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1451 	} else {
1452 		dma_free_coherent(&dev->pdev->dev,
1453 				  WARPPED_BUFSIZE(depth << CQE_SHIFT),
1454 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1455 	}
1456 
1457 err_out_xa:
1458 	xa_erase(&dev->cq_xa, cq->cqn);
1459 
1460 	return ret;
1461 }
1462 
1463 void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
1464 {
1465 	struct erdma_cmdq_config_mtu_req req;
1466 
1467 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1468 				CMDQ_OPCODE_CONF_MTU);
1469 	req.mtu = mtu;
1470 
1471 	erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1472 }
1473 
1474 void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
1475 {
1476 	struct ib_event event;
1477 
1478 	event.device = &dev->ibdev;
1479 	event.element.port_num = 1;
1480 	event.event = reason;
1481 
1482 	ib_dispatch_event(&event);
1483 }
1484