1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8 /* Copyright (c) 2008-2019, IBM Corporation */
9 
10 /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
11 
12 #include <linux/errno.h>
13 #include <linux/pci.h>
14 #include <linux/types.h>
15 #include <linux/uaccess.h>
16 #include <linux/vmalloc.h>
17 #include <net/addrconf.h>
18 #include <rdma/erdma-abi.h>
19 #include <rdma/ib_umem.h>
20 #include <rdma/ib_user_verbs.h>
21 #include <rdma/ib_verbs.h>
22 #include <rdma/uverbs_ioctl.h>
23 
24 #include "erdma.h"
25 #include "erdma_cm.h"
26 #include "erdma_hw.h"
27 #include "erdma_verbs.h"
28 
29 static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp)
30 {
31 	struct erdma_cmdq_create_qp_req req;
32 	struct erdma_pd *pd = to_epd(qp->ibqp.pd);
33 	struct erdma_uqp *user_qp;
34 	u64 resp0, resp1;
35 	int err;
36 
37 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
38 				CMDQ_OPCODE_CREATE_QP);
39 
40 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
41 			      ilog2(qp->attrs.sq_size)) |
42 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
43 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
44 			      ilog2(qp->attrs.rq_size)) |
45 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
46 
47 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
48 		u32 pgsz_range = ilog2(SZ_1M) - PAGE_SHIFT;
49 
50 		req.sq_cqn_mtt_cfg =
51 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
52 				   pgsz_range) |
53 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
54 		req.rq_cqn_mtt_cfg =
55 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
56 				   pgsz_range) |
57 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
58 
59 		req.sq_mtt_cfg =
60 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
61 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
62 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
63 				   ERDMA_MR_INLINE_MTT);
64 		req.rq_mtt_cfg = req.sq_mtt_cfg;
65 
66 		req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
67 		req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
68 		req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
69 					  (qp->attrs.sq_size << SQEBB_SHIFT);
70 		req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
71 					  (qp->attrs.rq_size << RQE_SHIFT);
72 	} else {
73 		user_qp = &qp->user_qp;
74 		req.sq_cqn_mtt_cfg = FIELD_PREP(
75 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
76 			ilog2(user_qp->sq_mtt.page_size) - PAGE_SHIFT);
77 		req.sq_cqn_mtt_cfg |=
78 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
79 
80 		req.rq_cqn_mtt_cfg = FIELD_PREP(
81 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
82 			ilog2(user_qp->rq_mtt.page_size) - PAGE_SHIFT);
83 		req.rq_cqn_mtt_cfg |=
84 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
85 
86 		req.sq_mtt_cfg = user_qp->sq_mtt.page_offset;
87 		req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
88 					     user_qp->sq_mtt.mtt_nents) |
89 				  FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
90 					     user_qp->sq_mtt.mtt_type);
91 
92 		req.rq_mtt_cfg = user_qp->rq_mtt.page_offset;
93 		req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
94 					     user_qp->rq_mtt.mtt_nents) |
95 				  FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
96 					     user_qp->rq_mtt.mtt_type);
97 
98 		req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0];
99 		req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0];
100 
101 		req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
102 		req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
103 	}
104 
105 	err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), &resp0,
106 				  &resp1);
107 	if (!err)
108 		qp->attrs.cookie =
109 			FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
110 
111 	return err;
112 }
113 
114 static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
115 {
116 	struct erdma_cmdq_reg_mr_req req;
117 	struct erdma_pd *pd = to_epd(mr->ibmr.pd);
118 	u64 *phy_addr;
119 	int i;
120 
121 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
122 
123 	req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
124 		   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
125 		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
126 	req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
127 		   FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
128 		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) |
129 		   FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0);
130 	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
131 			      ilog2(mr->mem.page_size)) |
132 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
133 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
134 
135 	if (mr->type == ERDMA_MR_TYPE_DMA)
136 		goto post_cmd;
137 
138 	if (mr->type == ERDMA_MR_TYPE_NORMAL) {
139 		req.start_va = mr->mem.va;
140 		req.size = mr->mem.len;
141 	}
142 
143 	if (mr->type == ERDMA_MR_TYPE_FRMR ||
144 	    mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) {
145 		phy_addr = req.phy_addr;
146 		*phy_addr = mr->mem.mtt_entry[0];
147 	} else {
148 		phy_addr = req.phy_addr;
149 		for (i = 0; i < mr->mem.mtt_nents; i++)
150 			*phy_addr++ = mr->mem.mtt_entry[i];
151 	}
152 
153 post_cmd:
154 	return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
155 				   NULL);
156 }
157 
158 static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq)
159 {
160 	struct erdma_cmdq_create_cq_req req;
161 	u32 page_size;
162 	struct erdma_mem *mtt;
163 
164 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
165 				CMDQ_OPCODE_CREATE_CQ);
166 
167 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
168 		   FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
169 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
170 
171 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
172 		page_size = SZ_32M;
173 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
174 				       ilog2(page_size) - PAGE_SHIFT);
175 		req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
176 		req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
177 
178 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
179 			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
180 				       ERDMA_MR_INLINE_MTT);
181 
182 		req.first_page_offset = 0;
183 		req.cq_db_info_addr =
184 			cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
185 	} else {
186 		mtt = &cq->user_cq.qbuf_mtt;
187 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
188 				       ilog2(mtt->page_size) - PAGE_SHIFT);
189 		if (mtt->mtt_nents == 1) {
190 			req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf);
191 			req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf);
192 		} else {
193 			req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]);
194 			req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]);
195 		}
196 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
197 				       mtt->mtt_nents);
198 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
199 				       mtt->mtt_type);
200 
201 		req.first_page_offset = mtt->page_offset;
202 		req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
203 	}
204 
205 	return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
206 				   NULL);
207 }
208 
209 static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
210 {
211 	int idx;
212 	unsigned long flags;
213 
214 	spin_lock_irqsave(&res_cb->lock, flags);
215 	idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
216 				 res_cb->next_alloc_idx);
217 	if (idx == res_cb->max_cap) {
218 		idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
219 		if (idx == res_cb->max_cap) {
220 			res_cb->next_alloc_idx = 1;
221 			spin_unlock_irqrestore(&res_cb->lock, flags);
222 			return -ENOSPC;
223 		}
224 	}
225 
226 	set_bit(idx, res_cb->bitmap);
227 	res_cb->next_alloc_idx = idx + 1;
228 	spin_unlock_irqrestore(&res_cb->lock, flags);
229 
230 	return idx;
231 }
232 
233 static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
234 {
235 	unsigned long flags;
236 	u32 used;
237 
238 	spin_lock_irqsave(&res_cb->lock, flags);
239 	used = __test_and_clear_bit(idx, res_cb->bitmap);
240 	spin_unlock_irqrestore(&res_cb->lock, flags);
241 	WARN_ON(!used);
242 }
243 
244 static struct rdma_user_mmap_entry *
245 erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
246 			     u32 size, u8 mmap_flag, u64 *mmap_offset)
247 {
248 	struct erdma_user_mmap_entry *entry =
249 		kzalloc(sizeof(*entry), GFP_KERNEL);
250 	int ret;
251 
252 	if (!entry)
253 		return NULL;
254 
255 	entry->address = (u64)address;
256 	entry->mmap_flag = mmap_flag;
257 
258 	size = PAGE_ALIGN(size);
259 
260 	ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
261 					  size);
262 	if (ret) {
263 		kfree(entry);
264 		return NULL;
265 	}
266 
267 	*mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
268 
269 	return &entry->rdma_entry;
270 }
271 
272 int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
273 		       struct ib_udata *unused)
274 {
275 	struct erdma_dev *dev = to_edev(ibdev);
276 
277 	memset(attr, 0, sizeof(*attr));
278 
279 	attr->max_mr_size = dev->attrs.max_mr_size;
280 	attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
281 	attr->vendor_part_id = dev->pdev->device;
282 	attr->hw_ver = dev->pdev->revision;
283 	attr->max_qp = dev->attrs.max_qp - 1;
284 	attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
285 	attr->max_qp_rd_atom = dev->attrs.max_ord;
286 	attr->max_qp_init_rd_atom = dev->attrs.max_ird;
287 	attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
288 	attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
289 	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
290 	ibdev->local_dma_lkey = dev->attrs.local_dma_key;
291 	attr->max_send_sge = dev->attrs.max_send_sge;
292 	attr->max_recv_sge = dev->attrs.max_recv_sge;
293 	attr->max_sge_rd = dev->attrs.max_sge_rd;
294 	attr->max_cq = dev->attrs.max_cq - 1;
295 	attr->max_cqe = dev->attrs.max_cqe;
296 	attr->max_mr = dev->attrs.max_mr;
297 	attr->max_pd = dev->attrs.max_pd;
298 	attr->max_mw = dev->attrs.max_mw;
299 	attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
300 	attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
301 	attr->fw_ver = dev->attrs.fw_version;
302 
303 	if (dev->netdev)
304 		addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
305 				    dev->netdev->dev_addr);
306 
307 	return 0;
308 }
309 
310 int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
311 		    union ib_gid *gid)
312 {
313 	struct erdma_dev *dev = to_edev(ibdev);
314 
315 	memset(gid, 0, sizeof(*gid));
316 	ether_addr_copy(gid->raw, dev->attrs.peer_addr);
317 
318 	return 0;
319 }
320 
321 int erdma_query_port(struct ib_device *ibdev, u32 port,
322 		     struct ib_port_attr *attr)
323 {
324 	struct erdma_dev *dev = to_edev(ibdev);
325 	struct net_device *ndev = dev->netdev;
326 
327 	memset(attr, 0, sizeof(*attr));
328 
329 	attr->gid_tbl_len = 1;
330 	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
331 	attr->max_msg_sz = -1;
332 
333 	if (!ndev)
334 		goto out;
335 
336 	ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
337 	attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
338 	attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
339 	if (netif_running(ndev) && netif_carrier_ok(ndev))
340 		dev->state = IB_PORT_ACTIVE;
341 	else
342 		dev->state = IB_PORT_DOWN;
343 	attr->state = dev->state;
344 
345 out:
346 	if (dev->state == IB_PORT_ACTIVE)
347 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
348 	else
349 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
350 
351 	return 0;
352 }
353 
354 int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
355 			     struct ib_port_immutable *port_immutable)
356 {
357 	port_immutable->gid_tbl_len = 1;
358 	port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
359 
360 	return 0;
361 }
362 
363 int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
364 {
365 	struct erdma_pd *pd = to_epd(ibpd);
366 	struct erdma_dev *dev = to_edev(ibpd->device);
367 	int pdn;
368 
369 	pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
370 	if (pdn < 0)
371 		return pdn;
372 
373 	pd->pdn = pdn;
374 
375 	return 0;
376 }
377 
378 int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
379 {
380 	struct erdma_pd *pd = to_epd(ibpd);
381 	struct erdma_dev *dev = to_edev(ibpd->device);
382 
383 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
384 
385 	return 0;
386 }
387 
388 static int erdma_qp_validate_cap(struct erdma_dev *dev,
389 				 struct ib_qp_init_attr *attrs)
390 {
391 	if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
392 	    (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
393 	    (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
394 	    (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
395 	    (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
396 	    !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
397 		return -EINVAL;
398 	}
399 
400 	return 0;
401 }
402 
403 static int erdma_qp_validate_attr(struct erdma_dev *dev,
404 				  struct ib_qp_init_attr *attrs)
405 {
406 	if (attrs->qp_type != IB_QPT_RC)
407 		return -EOPNOTSUPP;
408 
409 	if (attrs->srq)
410 		return -EOPNOTSUPP;
411 
412 	if (!attrs->send_cq || !attrs->recv_cq)
413 		return -EOPNOTSUPP;
414 
415 	return 0;
416 }
417 
418 static void free_kernel_qp(struct erdma_qp *qp)
419 {
420 	struct erdma_dev *dev = qp->dev;
421 
422 	vfree(qp->kern_qp.swr_tbl);
423 	vfree(qp->kern_qp.rwr_tbl);
424 
425 	if (qp->kern_qp.sq_buf)
426 		dma_free_coherent(
427 			&dev->pdev->dev,
428 			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
429 			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
430 
431 	if (qp->kern_qp.rq_buf)
432 		dma_free_coherent(
433 			&dev->pdev->dev,
434 			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
435 			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
436 }
437 
438 static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
439 			  struct ib_qp_init_attr *attrs)
440 {
441 	struct erdma_kqp *kqp = &qp->kern_qp;
442 	int size;
443 
444 	if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
445 		kqp->sig_all = 1;
446 
447 	kqp->sq_pi = 0;
448 	kqp->sq_ci = 0;
449 	kqp->rq_pi = 0;
450 	kqp->rq_ci = 0;
451 	kqp->hw_sq_db =
452 		dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
453 	kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
454 
455 	kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64));
456 	kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64));
457 	if (!kqp->swr_tbl || !kqp->rwr_tbl)
458 		goto err_out;
459 
460 	size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
461 	kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
462 					 &kqp->sq_buf_dma_addr, GFP_KERNEL);
463 	if (!kqp->sq_buf)
464 		goto err_out;
465 
466 	size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
467 	kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
468 					 &kqp->rq_buf_dma_addr, GFP_KERNEL);
469 	if (!kqp->rq_buf)
470 		goto err_out;
471 
472 	kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
473 	kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
474 
475 	return 0;
476 
477 err_out:
478 	free_kernel_qp(qp);
479 	return -ENOMEM;
480 }
481 
482 static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
483 			   u64 start, u64 len, int access, u64 virt,
484 			   unsigned long req_page_size, u8 force_indirect_mtt)
485 {
486 	struct ib_block_iter biter;
487 	uint64_t *phy_addr = NULL;
488 	int ret = 0;
489 
490 	mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
491 	if (IS_ERR(mem->umem)) {
492 		ret = PTR_ERR(mem->umem);
493 		mem->umem = NULL;
494 		return ret;
495 	}
496 
497 	mem->va = virt;
498 	mem->len = len;
499 	mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
500 	mem->page_offset = start & (mem->page_size - 1);
501 	mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
502 	mem->page_cnt = mem->mtt_nents;
503 
504 	if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES ||
505 	    force_indirect_mtt) {
506 		mem->mtt_type = ERDMA_MR_INDIRECT_MTT;
507 		mem->mtt_buf =
508 			alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL);
509 		if (!mem->mtt_buf) {
510 			ret = -ENOMEM;
511 			goto error_ret;
512 		}
513 		phy_addr = mem->mtt_buf;
514 	} else {
515 		mem->mtt_type = ERDMA_MR_INLINE_MTT;
516 		phy_addr = mem->mtt_entry;
517 	}
518 
519 	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) {
520 		*phy_addr = rdma_block_iter_dma_address(&biter);
521 		phy_addr++;
522 	}
523 
524 	if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) {
525 		mem->mtt_entry[0] =
526 			dma_map_single(&dev->pdev->dev, mem->mtt_buf,
527 				       MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
528 		if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) {
529 			free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
530 			mem->mtt_buf = NULL;
531 			ret = -ENOMEM;
532 			goto error_ret;
533 		}
534 	}
535 
536 	return 0;
537 
538 error_ret:
539 	if (mem->umem) {
540 		ib_umem_release(mem->umem);
541 		mem->umem = NULL;
542 	}
543 
544 	return ret;
545 }
546 
547 static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
548 {
549 	if (mem->mtt_buf) {
550 		dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0],
551 				 MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
552 		free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
553 	}
554 
555 	if (mem->umem) {
556 		ib_umem_release(mem->umem);
557 		mem->umem = NULL;
558 	}
559 }
560 
561 static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
562 				    u64 dbrecords_va,
563 				    struct erdma_user_dbrecords_page **dbr_page,
564 				    dma_addr_t *dma_addr)
565 {
566 	struct erdma_user_dbrecords_page *page = NULL;
567 	int rv = 0;
568 
569 	mutex_lock(&ctx->dbrecords_page_mutex);
570 
571 	list_for_each_entry(page, &ctx->dbrecords_page_list, list)
572 		if (page->va == (dbrecords_va & PAGE_MASK))
573 			goto found;
574 
575 	page = kmalloc(sizeof(*page), GFP_KERNEL);
576 	if (!page) {
577 		rv = -ENOMEM;
578 		goto out;
579 	}
580 
581 	page->va = (dbrecords_va & PAGE_MASK);
582 	page->refcnt = 0;
583 
584 	page->umem = ib_umem_get(ctx->ibucontext.device,
585 				 dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
586 	if (IS_ERR(page->umem)) {
587 		rv = PTR_ERR(page->umem);
588 		kfree(page);
589 		goto out;
590 	}
591 
592 	list_add(&page->list, &ctx->dbrecords_page_list);
593 
594 found:
595 	*dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
596 		    (dbrecords_va & ~PAGE_MASK);
597 	*dbr_page = page;
598 	page->refcnt++;
599 
600 out:
601 	mutex_unlock(&ctx->dbrecords_page_mutex);
602 	return rv;
603 }
604 
605 static void
606 erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
607 			   struct erdma_user_dbrecords_page **dbr_page)
608 {
609 	if (!ctx || !(*dbr_page))
610 		return;
611 
612 	mutex_lock(&ctx->dbrecords_page_mutex);
613 	if (--(*dbr_page)->refcnt == 0) {
614 		list_del(&(*dbr_page)->list);
615 		ib_umem_release((*dbr_page)->umem);
616 		kfree(*dbr_page);
617 	}
618 
619 	*dbr_page = NULL;
620 	mutex_unlock(&ctx->dbrecords_page_mutex);
621 }
622 
623 static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
624 			u64 va, u32 len, u64 db_info_va)
625 {
626 	dma_addr_t db_info_dma_addr;
627 	u32 rq_offset;
628 	int ret;
629 
630 	if (len < (PAGE_ALIGN(qp->attrs.sq_size * SQEBB_SIZE) +
631 		   qp->attrs.rq_size * RQE_SIZE))
632 		return -EINVAL;
633 
634 	ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va,
635 			      qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
636 			      (SZ_1M - SZ_4K), 1);
637 	if (ret)
638 		return ret;
639 
640 	rq_offset = PAGE_ALIGN(qp->attrs.sq_size << SQEBB_SHIFT);
641 	qp->user_qp.rq_offset = rq_offset;
642 
643 	ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset,
644 			      qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
645 			      (SZ_1M - SZ_4K), 1);
646 	if (ret)
647 		goto put_sq_mtt;
648 
649 	ret = erdma_map_user_dbrecords(uctx, db_info_va,
650 				       &qp->user_qp.user_dbr_page,
651 				       &db_info_dma_addr);
652 	if (ret)
653 		goto put_rq_mtt;
654 
655 	qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
656 	qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
657 
658 	return 0;
659 
660 put_rq_mtt:
661 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
662 
663 put_sq_mtt:
664 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
665 
666 	return ret;
667 }
668 
669 static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
670 {
671 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
672 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
673 	erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
674 }
675 
676 int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
677 		    struct ib_udata *udata)
678 {
679 	struct erdma_qp *qp = to_eqp(ibqp);
680 	struct erdma_dev *dev = to_edev(ibqp->device);
681 	struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
682 		udata, struct erdma_ucontext, ibucontext);
683 	struct erdma_ureq_create_qp ureq;
684 	struct erdma_uresp_create_qp uresp;
685 	int ret;
686 
687 	ret = erdma_qp_validate_cap(dev, attrs);
688 	if (ret)
689 		goto err_out;
690 
691 	ret = erdma_qp_validate_attr(dev, attrs);
692 	if (ret)
693 		goto err_out;
694 
695 	qp->scq = to_ecq(attrs->send_cq);
696 	qp->rcq = to_ecq(attrs->recv_cq);
697 	qp->dev = dev;
698 	qp->attrs.cc = dev->attrs.cc;
699 
700 	init_rwsem(&qp->state_lock);
701 	kref_init(&qp->ref);
702 	init_completion(&qp->safe_free);
703 
704 	ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
705 			      XA_LIMIT(1, dev->attrs.max_qp - 1),
706 			      &dev->next_alloc_qpn, GFP_KERNEL);
707 	if (ret < 0) {
708 		ret = -ENOMEM;
709 		goto err_out;
710 	}
711 
712 	qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
713 					       ERDMA_MAX_WQEBB_PER_SQE);
714 	qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
715 
716 	if (uctx) {
717 		ret = ib_copy_from_udata(&ureq, udata,
718 					 min(sizeof(ureq), udata->inlen));
719 		if (ret)
720 			goto err_out_xa;
721 
722 		ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
723 				   ureq.db_record_va);
724 		if (ret)
725 			goto err_out_xa;
726 
727 		memset(&uresp, 0, sizeof(uresp));
728 
729 		uresp.num_sqe = qp->attrs.sq_size;
730 		uresp.num_rqe = qp->attrs.rq_size;
731 		uresp.qp_id = QP_ID(qp);
732 		uresp.rq_offset = qp->user_qp.rq_offset;
733 
734 		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
735 		if (ret)
736 			goto err_out_cmd;
737 	} else {
738 		init_kernel_qp(dev, qp, attrs);
739 	}
740 
741 	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
742 	qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
743 	qp->attrs.state = ERDMA_QP_STATE_IDLE;
744 
745 	ret = create_qp_cmd(dev, qp);
746 	if (ret)
747 		goto err_out_cmd;
748 
749 	spin_lock_init(&qp->lock);
750 
751 	return 0;
752 
753 err_out_cmd:
754 	if (uctx)
755 		free_user_qp(qp, uctx);
756 	else
757 		free_kernel_qp(qp);
758 err_out_xa:
759 	xa_erase(&dev->qp_xa, QP_ID(qp));
760 err_out:
761 	return ret;
762 }
763 
764 static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
765 {
766 	int stag_idx;
767 
768 	stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
769 	if (stag_idx < 0)
770 		return stag_idx;
771 
772 	/* For now, we always let key field be zero. */
773 	*stag = (stag_idx << 8);
774 
775 	return 0;
776 }
777 
778 struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
779 {
780 	struct erdma_dev *dev = to_edev(ibpd->device);
781 	struct erdma_mr *mr;
782 	u32 stag;
783 	int ret;
784 
785 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
786 	if (!mr)
787 		return ERR_PTR(-ENOMEM);
788 
789 	ret = erdma_create_stag(dev, &stag);
790 	if (ret)
791 		goto out_free;
792 
793 	mr->type = ERDMA_MR_TYPE_DMA;
794 
795 	mr->ibmr.lkey = stag;
796 	mr->ibmr.rkey = stag;
797 	mr->ibmr.pd = ibpd;
798 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
799 	ret = regmr_cmd(dev, mr);
800 	if (ret)
801 		goto out_remove_stag;
802 
803 	return &mr->ibmr;
804 
805 out_remove_stag:
806 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
807 		       mr->ibmr.lkey >> 8);
808 
809 out_free:
810 	kfree(mr);
811 
812 	return ERR_PTR(ret);
813 }
814 
815 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
816 				u32 max_num_sg)
817 {
818 	struct erdma_mr *mr;
819 	struct erdma_dev *dev = to_edev(ibpd->device);
820 	int ret;
821 	u32 stag;
822 
823 	if (mr_type != IB_MR_TYPE_MEM_REG)
824 		return ERR_PTR(-EOPNOTSUPP);
825 
826 	if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
827 		return ERR_PTR(-EINVAL);
828 
829 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
830 	if (!mr)
831 		return ERR_PTR(-ENOMEM);
832 
833 	ret = erdma_create_stag(dev, &stag);
834 	if (ret)
835 		goto out_free;
836 
837 	mr->type = ERDMA_MR_TYPE_FRMR;
838 
839 	mr->ibmr.lkey = stag;
840 	mr->ibmr.rkey = stag;
841 	mr->ibmr.pd = ibpd;
842 	/* update it in FRMR. */
843 	mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
844 		     ERDMA_MR_ACC_RW;
845 
846 	mr->mem.page_size = PAGE_SIZE; /* update it later. */
847 	mr->mem.page_cnt = max_num_sg;
848 	mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT;
849 	mr->mem.mtt_buf =
850 		alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL);
851 	if (!mr->mem.mtt_buf) {
852 		ret = -ENOMEM;
853 		goto out_remove_stag;
854 	}
855 
856 	mr->mem.mtt_entry[0] =
857 		dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf,
858 			       MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
859 	if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) {
860 		ret = -ENOMEM;
861 		goto out_free_mtt;
862 	}
863 
864 	ret = regmr_cmd(dev, mr);
865 	if (ret)
866 		goto out_dma_unmap;
867 
868 	return &mr->ibmr;
869 
870 out_dma_unmap:
871 	dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0],
872 			 MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
873 out_free_mtt:
874 	free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt));
875 
876 out_remove_stag:
877 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
878 		       mr->ibmr.lkey >> 8);
879 
880 out_free:
881 	kfree(mr);
882 
883 	return ERR_PTR(ret);
884 }
885 
886 static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
887 {
888 	struct erdma_mr *mr = to_emr(ibmr);
889 
890 	if (mr->mem.mtt_nents >= mr->mem.page_cnt)
891 		return -1;
892 
893 	*((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr;
894 	mr->mem.mtt_nents++;
895 
896 	return 0;
897 }
898 
899 int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
900 		    unsigned int *sg_offset)
901 {
902 	struct erdma_mr *mr = to_emr(ibmr);
903 	int num;
904 
905 	mr->mem.mtt_nents = 0;
906 
907 	num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
908 			     erdma_set_page);
909 
910 	return num;
911 }
912 
913 struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
914 				u64 virt, int access, struct ib_udata *udata)
915 {
916 	struct erdma_mr *mr = NULL;
917 	struct erdma_dev *dev = to_edev(ibpd->device);
918 	u32 stag;
919 	int ret;
920 
921 	if (!len || len > dev->attrs.max_mr_size)
922 		return ERR_PTR(-EINVAL);
923 
924 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
925 	if (!mr)
926 		return ERR_PTR(-ENOMEM);
927 
928 	ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
929 			      SZ_2G - SZ_4K, 0);
930 	if (ret)
931 		goto err_out_free;
932 
933 	ret = erdma_create_stag(dev, &stag);
934 	if (ret)
935 		goto err_out_put_mtt;
936 
937 	mr->ibmr.lkey = mr->ibmr.rkey = stag;
938 	mr->ibmr.pd = ibpd;
939 	mr->mem.va = virt;
940 	mr->mem.len = len;
941 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
942 	mr->valid = 1;
943 	mr->type = ERDMA_MR_TYPE_NORMAL;
944 
945 	ret = regmr_cmd(dev, mr);
946 	if (ret)
947 		goto err_out_mr;
948 
949 	return &mr->ibmr;
950 
951 err_out_mr:
952 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
953 		       mr->ibmr.lkey >> 8);
954 
955 err_out_put_mtt:
956 	put_mtt_entries(dev, &mr->mem);
957 
958 err_out_free:
959 	kfree(mr);
960 
961 	return ERR_PTR(ret);
962 }
963 
964 int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
965 {
966 	struct erdma_mr *mr;
967 	struct erdma_dev *dev = to_edev(ibmr->device);
968 	struct erdma_cmdq_dereg_mr_req req;
969 	int ret;
970 
971 	mr = to_emr(ibmr);
972 
973 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
974 				CMDQ_OPCODE_DEREG_MR);
975 
976 	req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
977 		  FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
978 
979 	ret = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
980 				  NULL);
981 	if (ret)
982 		return ret;
983 
984 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
985 
986 	put_mtt_entries(dev, &mr->mem);
987 
988 	kfree(mr);
989 	return 0;
990 }
991 
992 int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
993 {
994 	struct erdma_cq *cq = to_ecq(ibcq);
995 	struct erdma_dev *dev = to_edev(ibcq->device);
996 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
997 		udata, struct erdma_ucontext, ibucontext);
998 	int err;
999 	struct erdma_cmdq_destroy_cq_req req;
1000 
1001 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1002 				CMDQ_OPCODE_DESTROY_CQ);
1003 	req.cqn = cq->cqn;
1004 
1005 	err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
1006 				  NULL);
1007 	if (err)
1008 		return err;
1009 
1010 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
1011 		dma_free_coherent(&dev->pdev->dev,
1012 				  WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1013 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1014 	} else {
1015 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1016 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1017 	}
1018 
1019 	xa_erase(&dev->cq_xa, cq->cqn);
1020 
1021 	return 0;
1022 }
1023 
1024 int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1025 {
1026 	struct erdma_qp *qp = to_eqp(ibqp);
1027 	struct erdma_dev *dev = to_edev(ibqp->device);
1028 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1029 		udata, struct erdma_ucontext, ibucontext);
1030 	struct erdma_qp_attrs qp_attrs;
1031 	int err;
1032 	struct erdma_cmdq_destroy_qp_req req;
1033 
1034 	down_write(&qp->state_lock);
1035 	qp_attrs.state = ERDMA_QP_STATE_ERROR;
1036 	erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
1037 	up_write(&qp->state_lock);
1038 
1039 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1040 				CMDQ_OPCODE_DESTROY_QP);
1041 	req.qpn = QP_ID(qp);
1042 
1043 	err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
1044 				  NULL);
1045 	if (err)
1046 		return err;
1047 
1048 	erdma_qp_put(qp);
1049 	wait_for_completion(&qp->safe_free);
1050 
1051 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
1052 		vfree(qp->kern_qp.swr_tbl);
1053 		vfree(qp->kern_qp.rwr_tbl);
1054 		dma_free_coherent(
1055 			&dev->pdev->dev,
1056 			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
1057 			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
1058 		dma_free_coherent(
1059 			&dev->pdev->dev,
1060 			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
1061 			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
1062 	} else {
1063 		put_mtt_entries(dev, &qp->user_qp.sq_mtt);
1064 		put_mtt_entries(dev, &qp->user_qp.rq_mtt);
1065 		erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1066 	}
1067 
1068 	if (qp->cep)
1069 		erdma_cep_put(qp->cep);
1070 	xa_erase(&dev->qp_xa, QP_ID(qp));
1071 
1072 	return 0;
1073 }
1074 
1075 void erdma_qp_get_ref(struct ib_qp *ibqp)
1076 {
1077 	erdma_qp_get(to_eqp(ibqp));
1078 }
1079 
1080 void erdma_qp_put_ref(struct ib_qp *ibqp)
1081 {
1082 	erdma_qp_put(to_eqp(ibqp));
1083 }
1084 
1085 int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1086 {
1087 	struct rdma_user_mmap_entry *rdma_entry;
1088 	struct erdma_user_mmap_entry *entry;
1089 	pgprot_t prot;
1090 	int err;
1091 
1092 	rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1093 	if (!rdma_entry)
1094 		return -EINVAL;
1095 
1096 	entry = to_emmap(rdma_entry);
1097 
1098 	switch (entry->mmap_flag) {
1099 	case ERDMA_MMAP_IO_NC:
1100 		/* map doorbell. */
1101 		prot = pgprot_device(vma->vm_page_prot);
1102 		break;
1103 	default:
1104 		return -EINVAL;
1105 	}
1106 
1107 	err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1108 				prot, rdma_entry);
1109 
1110 	rdma_user_mmap_entry_put(rdma_entry);
1111 	return err;
1112 }
1113 
1114 void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1115 {
1116 	struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1117 
1118 	kfree(entry);
1119 }
1120 
1121 #define ERDMA_SDB_PAGE 0
1122 #define ERDMA_SDB_ENTRY 1
1123 #define ERDMA_SDB_SHARED 2
1124 
1125 static void alloc_db_resources(struct erdma_dev *dev,
1126 			       struct erdma_ucontext *ctx)
1127 {
1128 	u32 bitmap_idx;
1129 	struct erdma_devattr *attrs = &dev->attrs;
1130 
1131 	if (attrs->disable_dwqe)
1132 		goto alloc_normal_db;
1133 
1134 	/* Try to alloc independent SDB page. */
1135 	spin_lock(&dev->db_bitmap_lock);
1136 	bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages);
1137 	if (bitmap_idx != attrs->dwqe_pages) {
1138 		set_bit(bitmap_idx, dev->sdb_page);
1139 		spin_unlock(&dev->db_bitmap_lock);
1140 
1141 		ctx->sdb_type = ERDMA_SDB_PAGE;
1142 		ctx->sdb_idx = bitmap_idx;
1143 		ctx->sdb_page_idx = bitmap_idx;
1144 		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1145 			   (bitmap_idx << PAGE_SHIFT);
1146 		ctx->sdb_page_off = 0;
1147 
1148 		return;
1149 	}
1150 
1151 	bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries);
1152 	if (bitmap_idx != attrs->dwqe_entries) {
1153 		set_bit(bitmap_idx, dev->sdb_entry);
1154 		spin_unlock(&dev->db_bitmap_lock);
1155 
1156 		ctx->sdb_type = ERDMA_SDB_ENTRY;
1157 		ctx->sdb_idx = bitmap_idx;
1158 		ctx->sdb_page_idx = attrs->dwqe_pages +
1159 				    bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1160 		ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1161 
1162 		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1163 			   (ctx->sdb_page_idx << PAGE_SHIFT);
1164 
1165 		return;
1166 	}
1167 
1168 	spin_unlock(&dev->db_bitmap_lock);
1169 
1170 alloc_normal_db:
1171 	ctx->sdb_type = ERDMA_SDB_SHARED;
1172 	ctx->sdb_idx = 0;
1173 	ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX;
1174 	ctx->sdb_page_off = 0;
1175 
1176 	ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT);
1177 }
1178 
1179 static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1180 {
1181 	rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1182 	rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1183 	rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1184 }
1185 
1186 int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1187 {
1188 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1189 	struct erdma_dev *dev = to_edev(ibctx->device);
1190 	int ret;
1191 	struct erdma_uresp_alloc_ctx uresp = {};
1192 
1193 	if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1194 		ret = -ENOMEM;
1195 		goto err_out;
1196 	}
1197 
1198 	INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1199 	mutex_init(&ctx->dbrecords_page_mutex);
1200 
1201 	alloc_db_resources(dev, ctx);
1202 
1203 	ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1204 	ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1205 
1206 	if (udata->outlen < sizeof(uresp)) {
1207 		ret = -EINVAL;
1208 		goto err_out;
1209 	}
1210 
1211 	ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1212 		ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1213 	if (!ctx->sq_db_mmap_entry) {
1214 		ret = -ENOMEM;
1215 		goto err_out;
1216 	}
1217 
1218 	ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1219 		ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1220 	if (!ctx->rq_db_mmap_entry) {
1221 		ret = -EINVAL;
1222 		goto err_out;
1223 	}
1224 
1225 	ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1226 		ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1227 	if (!ctx->cq_db_mmap_entry) {
1228 		ret = -EINVAL;
1229 		goto err_out;
1230 	}
1231 
1232 	uresp.dev_id = dev->pdev->device;
1233 	uresp.sdb_type = ctx->sdb_type;
1234 	uresp.sdb_offset = ctx->sdb_page_off;
1235 
1236 	ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1237 	if (ret)
1238 		goto err_out;
1239 
1240 	return 0;
1241 
1242 err_out:
1243 	erdma_uctx_user_mmap_entries_remove(ctx);
1244 	atomic_dec(&dev->num_ctx);
1245 	return ret;
1246 }
1247 
1248 void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1249 {
1250 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1251 	struct erdma_dev *dev = to_edev(ibctx->device);
1252 
1253 	spin_lock(&dev->db_bitmap_lock);
1254 	if (ctx->sdb_type == ERDMA_SDB_PAGE)
1255 		clear_bit(ctx->sdb_idx, dev->sdb_page);
1256 	else if (ctx->sdb_type == ERDMA_SDB_ENTRY)
1257 		clear_bit(ctx->sdb_idx, dev->sdb_entry);
1258 
1259 	erdma_uctx_user_mmap_entries_remove(ctx);
1260 
1261 	spin_unlock(&dev->db_bitmap_lock);
1262 
1263 	atomic_dec(&dev->num_ctx);
1264 }
1265 
1266 static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
1267 	[IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
1268 	[IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
1269 	[IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
1270 	[IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
1271 	[IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
1272 	[IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
1273 	[IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
1274 };
1275 
1276 int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1277 		    struct ib_udata *udata)
1278 {
1279 	struct erdma_qp_attrs new_attrs;
1280 	enum erdma_qp_attr_mask erdma_attr_mask = 0;
1281 	struct erdma_qp *qp = to_eqp(ibqp);
1282 	int ret = 0;
1283 
1284 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1285 		return -EOPNOTSUPP;
1286 
1287 	memset(&new_attrs, 0, sizeof(new_attrs));
1288 
1289 	if (attr_mask & IB_QP_STATE) {
1290 		new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
1291 
1292 		erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
1293 	}
1294 
1295 	down_write(&qp->state_lock);
1296 
1297 	ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
1298 
1299 	up_write(&qp->state_lock);
1300 
1301 	return ret;
1302 }
1303 
1304 int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1305 		   int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1306 {
1307 	struct erdma_qp *qp;
1308 	struct erdma_dev *dev;
1309 
1310 	if (ibqp && qp_attr && qp_init_attr) {
1311 		qp = to_eqp(ibqp);
1312 		dev = to_edev(ibqp->device);
1313 	} else {
1314 		return -EINVAL;
1315 	}
1316 
1317 	qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1318 	qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1319 
1320 	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1321 	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1322 	qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1323 	qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1324 
1325 	qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1326 	qp_attr->max_rd_atomic = qp->attrs.irq_size;
1327 	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1328 
1329 	qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1330 				   IB_ACCESS_REMOTE_WRITE |
1331 				   IB_ACCESS_REMOTE_READ;
1332 
1333 	qp_init_attr->cap = qp_attr->cap;
1334 
1335 	return 0;
1336 }
1337 
1338 static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1339 			      struct erdma_ureq_create_cq *ureq)
1340 {
1341 	int ret;
1342 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1343 
1344 	ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va,
1345 			      ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1346 			      1);
1347 	if (ret)
1348 		return ret;
1349 
1350 	ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1351 				       &cq->user_cq.user_dbr_page,
1352 				       &cq->user_cq.db_info_dma_addr);
1353 	if (ret)
1354 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1355 
1356 	return ret;
1357 }
1358 
1359 static int erdma_init_kernel_cq(struct erdma_cq *cq)
1360 {
1361 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1362 
1363 	cq->kern_cq.qbuf =
1364 		dma_alloc_coherent(&dev->pdev->dev,
1365 				   WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1366 				   &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1367 	if (!cq->kern_cq.qbuf)
1368 		return -ENOMEM;
1369 
1370 	cq->kern_cq.db_record =
1371 		(u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
1372 	spin_lock_init(&cq->kern_cq.lock);
1373 	/* use default cqdb addr */
1374 	cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1375 
1376 	return 0;
1377 }
1378 
1379 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1380 		    struct ib_udata *udata)
1381 {
1382 	struct erdma_cq *cq = to_ecq(ibcq);
1383 	struct erdma_dev *dev = to_edev(ibcq->device);
1384 	unsigned int depth = attr->cqe;
1385 	int ret;
1386 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1387 		udata, struct erdma_ucontext, ibucontext);
1388 
1389 	if (depth > dev->attrs.max_cqe)
1390 		return -EINVAL;
1391 
1392 	depth = roundup_pow_of_two(depth);
1393 	cq->ibcq.cqe = depth;
1394 	cq->depth = depth;
1395 	cq->assoc_eqn = attr->comp_vector + 1;
1396 
1397 	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1398 			      XA_LIMIT(1, dev->attrs.max_cq - 1),
1399 			      &dev->next_alloc_cqn, GFP_KERNEL);
1400 	if (ret < 0)
1401 		return ret;
1402 
1403 	if (!rdma_is_kernel_res(&ibcq->res)) {
1404 		struct erdma_ureq_create_cq ureq;
1405 		struct erdma_uresp_create_cq uresp;
1406 
1407 		ret = ib_copy_from_udata(&ureq, udata,
1408 					 min(udata->inlen, sizeof(ureq)));
1409 		if (ret)
1410 			goto err_out_xa;
1411 
1412 		ret = erdma_init_user_cq(ctx, cq, &ureq);
1413 		if (ret)
1414 			goto err_out_xa;
1415 
1416 		uresp.cq_id = cq->cqn;
1417 		uresp.num_cqe = depth;
1418 
1419 		ret = ib_copy_to_udata(udata, &uresp,
1420 				       min(sizeof(uresp), udata->outlen));
1421 		if (ret)
1422 			goto err_free_res;
1423 	} else {
1424 		ret = erdma_init_kernel_cq(cq);
1425 		if (ret)
1426 			goto err_out_xa;
1427 	}
1428 
1429 	ret = create_cq_cmd(dev, cq);
1430 	if (ret)
1431 		goto err_free_res;
1432 
1433 	return 0;
1434 
1435 err_free_res:
1436 	if (!rdma_is_kernel_res(&ibcq->res)) {
1437 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1438 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1439 	} else {
1440 		dma_free_coherent(&dev->pdev->dev,
1441 				  WARPPED_BUFSIZE(depth << CQE_SHIFT),
1442 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1443 	}
1444 
1445 err_out_xa:
1446 	xa_erase(&dev->cq_xa, cq->cqn);
1447 
1448 	return ret;
1449 }
1450 
1451 void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
1452 {
1453 	struct ib_event event;
1454 
1455 	event.device = &dev->ibdev;
1456 	event.element.port_num = 1;
1457 	event.event = reason;
1458 
1459 	ib_dispatch_event(&event);
1460 }
1461