xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision b8d312aa)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 
55 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
56 #define	RDMA_MAX_SGE_PER_SRQ	(4)
57 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
58 
59 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
60 
61 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
62 					size_t len)
63 {
64 	size_t min_len = min_t(size_t, len, udata->outlen);
65 
66 	return ib_copy_to_udata(udata, src, min_len);
67 }
68 
69 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
70 {
71 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
72 		return -EINVAL;
73 
74 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
75 	return 0;
76 }
77 
78 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
79 		      int index, union ib_gid *sgid)
80 {
81 	struct qedr_dev *dev = get_qedr_dev(ibdev);
82 
83 	memset(sgid->raw, 0, sizeof(sgid->raw));
84 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
85 
86 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
87 		 sgid->global.interface_id, sgid->global.subnet_prefix);
88 
89 	return 0;
90 }
91 
92 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
93 {
94 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
95 	struct qedr_device_attr *qattr = &dev->attr;
96 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
97 
98 	srq_attr->srq_limit = srq->srq_limit;
99 	srq_attr->max_wr = qattr->max_srq_wr;
100 	srq_attr->max_sge = qattr->max_sge;
101 
102 	return 0;
103 }
104 
105 int qedr_query_device(struct ib_device *ibdev,
106 		      struct ib_device_attr *attr, struct ib_udata *udata)
107 {
108 	struct qedr_dev *dev = get_qedr_dev(ibdev);
109 	struct qedr_device_attr *qattr = &dev->attr;
110 
111 	if (!dev->rdma_ctx) {
112 		DP_ERR(dev,
113 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
114 		       dev->rdma_ctx);
115 		return -EINVAL;
116 	}
117 
118 	memset(attr, 0, sizeof(*attr));
119 
120 	attr->fw_ver = qattr->fw_ver;
121 	attr->sys_image_guid = qattr->sys_image_guid;
122 	attr->max_mr_size = qattr->max_mr_size;
123 	attr->page_size_cap = qattr->page_size_caps;
124 	attr->vendor_id = qattr->vendor_id;
125 	attr->vendor_part_id = qattr->vendor_part_id;
126 	attr->hw_ver = qattr->hw_ver;
127 	attr->max_qp = qattr->max_qp;
128 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
129 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
130 	    IB_DEVICE_RC_RNR_NAK_GEN |
131 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
132 
133 	attr->max_send_sge = qattr->max_sge;
134 	attr->max_recv_sge = qattr->max_sge;
135 	attr->max_sge_rd = qattr->max_sge;
136 	attr->max_cq = qattr->max_cq;
137 	attr->max_cqe = qattr->max_cqe;
138 	attr->max_mr = qattr->max_mr;
139 	attr->max_mw = qattr->max_mw;
140 	attr->max_pd = qattr->max_pd;
141 	attr->atomic_cap = dev->atomic_cap;
142 	attr->max_fmr = qattr->max_fmr;
143 	attr->max_map_per_fmr = 16;
144 	attr->max_qp_init_rd_atom =
145 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
146 	attr->max_qp_rd_atom =
147 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
148 		attr->max_qp_init_rd_atom);
149 
150 	attr->max_srq = qattr->max_srq;
151 	attr->max_srq_sge = qattr->max_srq_sge;
152 	attr->max_srq_wr = qattr->max_srq_wr;
153 
154 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
155 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
156 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
157 	attr->max_ah = qattr->max_ah;
158 
159 	return 0;
160 }
161 
162 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
163 					    u8 *ib_width)
164 {
165 	switch (speed) {
166 	case 1000:
167 		*ib_speed = IB_SPEED_SDR;
168 		*ib_width = IB_WIDTH_1X;
169 		break;
170 	case 10000:
171 		*ib_speed = IB_SPEED_QDR;
172 		*ib_width = IB_WIDTH_1X;
173 		break;
174 
175 	case 20000:
176 		*ib_speed = IB_SPEED_DDR;
177 		*ib_width = IB_WIDTH_4X;
178 		break;
179 
180 	case 25000:
181 		*ib_speed = IB_SPEED_EDR;
182 		*ib_width = IB_WIDTH_1X;
183 		break;
184 
185 	case 40000:
186 		*ib_speed = IB_SPEED_QDR;
187 		*ib_width = IB_WIDTH_4X;
188 		break;
189 
190 	case 50000:
191 		*ib_speed = IB_SPEED_HDR;
192 		*ib_width = IB_WIDTH_1X;
193 		break;
194 
195 	case 100000:
196 		*ib_speed = IB_SPEED_EDR;
197 		*ib_width = IB_WIDTH_4X;
198 		break;
199 
200 	default:
201 		/* Unsupported */
202 		*ib_speed = IB_SPEED_SDR;
203 		*ib_width = IB_WIDTH_1X;
204 	}
205 }
206 
207 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
208 {
209 	struct qedr_dev *dev;
210 	struct qed_rdma_port *rdma_port;
211 
212 	dev = get_qedr_dev(ibdev);
213 
214 	if (!dev->rdma_ctx) {
215 		DP_ERR(dev, "rdma_ctx is NULL\n");
216 		return -EINVAL;
217 	}
218 
219 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
220 
221 	/* *attr being zeroed by the caller, avoid zeroing it here */
222 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
223 		attr->state = IB_PORT_ACTIVE;
224 		attr->phys_state = 5;
225 	} else {
226 		attr->state = IB_PORT_DOWN;
227 		attr->phys_state = 3;
228 	}
229 	attr->max_mtu = IB_MTU_4096;
230 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
231 	attr->lid = 0;
232 	attr->lmc = 0;
233 	attr->sm_lid = 0;
234 	attr->sm_sl = 0;
235 	attr->ip_gids = true;
236 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
237 		attr->gid_tbl_len = 1;
238 		attr->pkey_tbl_len = 1;
239 	} else {
240 		attr->gid_tbl_len = QEDR_MAX_SGID;
241 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
242 	}
243 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
244 	attr->qkey_viol_cntr = 0;
245 	get_link_speed_and_width(rdma_port->link_speed,
246 				 &attr->active_speed, &attr->active_width);
247 	attr->max_msg_sz = rdma_port->max_msg_size;
248 	attr->max_vl_num = 4;
249 
250 	return 0;
251 }
252 
253 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
254 		     struct ib_port_modify *props)
255 {
256 	return 0;
257 }
258 
259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
260 			 unsigned long len)
261 {
262 	struct qedr_mm *mm;
263 
264 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
265 	if (!mm)
266 		return -ENOMEM;
267 
268 	mm->key.phy_addr = phy_addr;
269 	/* This function might be called with a length which is not a multiple
270 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
271 	 * forces this granularity by increasing the requested size if needed.
272 	 * When qedr_mmap is called, it will search the list with the updated
273 	 * length as a key. To prevent search failures, the length is rounded up
274 	 * in advance to PAGE_SIZE.
275 	 */
276 	mm->key.len = roundup(len, PAGE_SIZE);
277 	INIT_LIST_HEAD(&mm->entry);
278 
279 	mutex_lock(&uctx->mm_list_lock);
280 	list_add(&mm->entry, &uctx->mm_head);
281 	mutex_unlock(&uctx->mm_list_lock);
282 
283 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
284 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
285 		 (unsigned long long)mm->key.phy_addr,
286 		 (unsigned long)mm->key.len, uctx);
287 
288 	return 0;
289 }
290 
291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
292 			     unsigned long len)
293 {
294 	bool found = false;
295 	struct qedr_mm *mm;
296 
297 	mutex_lock(&uctx->mm_list_lock);
298 	list_for_each_entry(mm, &uctx->mm_head, entry) {
299 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
300 			continue;
301 
302 		found = true;
303 		break;
304 	}
305 	mutex_unlock(&uctx->mm_list_lock);
306 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
307 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
308 		 mm->key.phy_addr, mm->key.len, uctx, found);
309 
310 	return found;
311 }
312 
313 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
314 {
315 	struct ib_device *ibdev = uctx->device;
316 	int rc;
317 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
318 	struct qedr_alloc_ucontext_resp uresp = {};
319 	struct qedr_dev *dev = get_qedr_dev(ibdev);
320 	struct qed_rdma_add_user_out_params oparams;
321 
322 	if (!udata)
323 		return -EFAULT;
324 
325 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
326 	if (rc) {
327 		DP_ERR(dev,
328 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
329 		       rc);
330 		return rc;
331 	}
332 
333 	ctx->dpi = oparams.dpi;
334 	ctx->dpi_addr = oparams.dpi_addr;
335 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
336 	ctx->dpi_size = oparams.dpi_size;
337 	INIT_LIST_HEAD(&ctx->mm_head);
338 	mutex_init(&ctx->mm_list_lock);
339 
340 	uresp.dpm_enabled = dev->user_dpm_enabled;
341 	uresp.wids_enabled = 1;
342 	uresp.wid_count = oparams.wid_count;
343 	uresp.db_pa = ctx->dpi_phys_addr;
344 	uresp.db_size = ctx->dpi_size;
345 	uresp.max_send_wr = dev->attr.max_sqe;
346 	uresp.max_recv_wr = dev->attr.max_rqe;
347 	uresp.max_srq_wr = dev->attr.max_srq_wr;
348 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
349 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
350 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
351 	uresp.max_cqes = QEDR_MAX_CQES;
352 
353 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
354 	if (rc)
355 		return rc;
356 
357 	ctx->dev = dev;
358 
359 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
360 	if (rc)
361 		return rc;
362 
363 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
364 		 &ctx->ibucontext);
365 	return 0;
366 }
367 
368 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
369 {
370 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
371 	struct qedr_mm *mm, *tmp;
372 
373 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
374 		 uctx);
375 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
376 
377 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
378 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
379 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
380 			 mm->key.phy_addr, mm->key.len, uctx);
381 		list_del(&mm->entry);
382 		kfree(mm);
383 	}
384 }
385 
386 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
387 {
388 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
389 	struct qedr_dev *dev = get_qedr_dev(context->device);
390 	unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
391 	unsigned long len = (vma->vm_end - vma->vm_start);
392 	unsigned long dpi_start;
393 
394 	dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
395 
396 	DP_DEBUG(dev, QEDR_MSG_INIT,
397 		 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
398 		 (void *)vma->vm_start, (void *)vma->vm_end,
399 		 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
400 
401 	if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
402 		DP_ERR(dev,
403 		       "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
404 		       (void *)vma->vm_start, (void *)vma->vm_end);
405 		return -EINVAL;
406 	}
407 
408 	if (!qedr_search_mmap(ucontext, phys_addr, len)) {
409 		DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
410 		       vma->vm_pgoff);
411 		return -EINVAL;
412 	}
413 
414 	if (phys_addr < dpi_start ||
415 	    ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
416 		DP_ERR(dev,
417 		       "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
418 		       (void *)phys_addr, (void *)dpi_start,
419 		       ucontext->dpi_size);
420 		return -EINVAL;
421 	}
422 
423 	if (vma->vm_flags & VM_READ) {
424 		DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
425 		return -EINVAL;
426 	}
427 
428 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
429 	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
430 				  vma->vm_page_prot);
431 }
432 
433 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
434 {
435 	struct ib_device *ibdev = ibpd->device;
436 	struct qedr_dev *dev = get_qedr_dev(ibdev);
437 	struct qedr_pd *pd = get_qedr_pd(ibpd);
438 	u16 pd_id;
439 	int rc;
440 
441 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
442 		 udata ? "User Lib" : "Kernel");
443 
444 	if (!dev->rdma_ctx) {
445 		DP_ERR(dev, "invalid RDMA context\n");
446 		return -EINVAL;
447 	}
448 
449 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
450 	if (rc)
451 		return rc;
452 
453 	pd->pd_id = pd_id;
454 
455 	if (udata) {
456 		struct qedr_alloc_pd_uresp uresp = {
457 			.pd_id = pd_id,
458 		};
459 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
460 			udata, struct qedr_ucontext, ibucontext);
461 
462 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
463 		if (rc) {
464 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
465 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
466 			return rc;
467 		}
468 
469 		pd->uctx = context;
470 		pd->uctx->pd = pd;
471 	}
472 
473 	return 0;
474 }
475 
476 void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
477 {
478 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
479 	struct qedr_pd *pd = get_qedr_pd(ibpd);
480 
481 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
482 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
483 }
484 
485 static void qedr_free_pbl(struct qedr_dev *dev,
486 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
487 {
488 	struct pci_dev *pdev = dev->pdev;
489 	int i;
490 
491 	for (i = 0; i < pbl_info->num_pbls; i++) {
492 		if (!pbl[i].va)
493 			continue;
494 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
495 				  pbl[i].va, pbl[i].pa);
496 	}
497 
498 	kfree(pbl);
499 }
500 
501 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
502 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
503 
504 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
505 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
506 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
507 
508 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
509 					   struct qedr_pbl_info *pbl_info,
510 					   gfp_t flags)
511 {
512 	struct pci_dev *pdev = dev->pdev;
513 	struct qedr_pbl *pbl_table;
514 	dma_addr_t *pbl_main_tbl;
515 	dma_addr_t pa;
516 	void *va;
517 	int i;
518 
519 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
520 	if (!pbl_table)
521 		return ERR_PTR(-ENOMEM);
522 
523 	for (i = 0; i < pbl_info->num_pbls; i++) {
524 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
525 					flags);
526 		if (!va)
527 			goto err;
528 
529 		pbl_table[i].va = va;
530 		pbl_table[i].pa = pa;
531 	}
532 
533 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
534 	 * the first one with physical pointers to all of the rest
535 	 */
536 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
537 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
538 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
539 
540 	return pbl_table;
541 
542 err:
543 	for (i--; i >= 0; i--)
544 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
545 				  pbl_table[i].va, pbl_table[i].pa);
546 
547 	qedr_free_pbl(dev, pbl_info, pbl_table);
548 
549 	return ERR_PTR(-ENOMEM);
550 }
551 
552 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
553 				struct qedr_pbl_info *pbl_info,
554 				u32 num_pbes, int two_layer_capable)
555 {
556 	u32 pbl_capacity;
557 	u32 pbl_size;
558 	u32 num_pbls;
559 
560 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
561 		if (num_pbes > MAX_PBES_TWO_LAYER) {
562 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
563 			       num_pbes);
564 			return -EINVAL;
565 		}
566 
567 		/* calculate required pbl page size */
568 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
569 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
570 			       NUM_PBES_ON_PAGE(pbl_size);
571 
572 		while (pbl_capacity < num_pbes) {
573 			pbl_size *= 2;
574 			pbl_capacity = pbl_size / sizeof(u64);
575 			pbl_capacity = pbl_capacity * pbl_capacity;
576 		}
577 
578 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
579 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
580 		pbl_info->two_layered = true;
581 	} else {
582 		/* One layered PBL */
583 		num_pbls = 1;
584 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
585 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
586 		pbl_info->two_layered = false;
587 	}
588 
589 	pbl_info->num_pbls = num_pbls;
590 	pbl_info->pbl_size = pbl_size;
591 	pbl_info->num_pbes = num_pbes;
592 
593 	DP_DEBUG(dev, QEDR_MSG_MR,
594 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
595 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
596 
597 	return 0;
598 }
599 
600 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
601 			       struct qedr_pbl *pbl,
602 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
603 {
604 	int pbe_cnt, total_num_pbes = 0;
605 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
606 	struct qedr_pbl *pbl_tbl;
607 	struct sg_dma_page_iter sg_iter;
608 	struct regpair *pbe;
609 	u64 pg_addr;
610 
611 	if (!pbl_info->num_pbes)
612 		return;
613 
614 	/* If we have a two layered pbl, the first pbl points to the rest
615 	 * of the pbls and the first entry lays on the second pbl in the table
616 	 */
617 	if (pbl_info->two_layered)
618 		pbl_tbl = &pbl[1];
619 	else
620 		pbl_tbl = pbl;
621 
622 	pbe = (struct regpair *)pbl_tbl->va;
623 	if (!pbe) {
624 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
625 		return;
626 	}
627 
628 	pbe_cnt = 0;
629 
630 	fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
631 
632 	for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
633 		pg_addr = sg_page_iter_dma_address(&sg_iter);
634 		for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
635 			pbe->lo = cpu_to_le32(pg_addr);
636 			pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
637 
638 			pg_addr += BIT(pg_shift);
639 			pbe_cnt++;
640 			total_num_pbes++;
641 			pbe++;
642 
643 			if (total_num_pbes == pbl_info->num_pbes)
644 				return;
645 
646 			/* If the given pbl is full storing the pbes,
647 			 * move to next pbl.
648 			 */
649 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
650 				pbl_tbl++;
651 				pbe = (struct regpair *)pbl_tbl->va;
652 				pbe_cnt = 0;
653 			}
654 
655 			fw_pg_cnt++;
656 		}
657 	}
658 }
659 
660 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
661 			      struct qedr_cq *cq, struct ib_udata *udata)
662 {
663 	struct qedr_create_cq_uresp uresp;
664 	int rc;
665 
666 	memset(&uresp, 0, sizeof(uresp));
667 
668 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
669 	uresp.icid = cq->icid;
670 
671 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
672 	if (rc)
673 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
674 
675 	return rc;
676 }
677 
678 static void consume_cqe(struct qedr_cq *cq)
679 {
680 	if (cq->latest_cqe == cq->toggle_cqe)
681 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
682 
683 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
684 }
685 
686 static inline int qedr_align_cq_entries(int entries)
687 {
688 	u64 size, aligned_size;
689 
690 	/* We allocate an extra entry that we don't report to the FW. */
691 	size = (entries + 1) * QEDR_CQE_SIZE;
692 	aligned_size = ALIGN(size, PAGE_SIZE);
693 
694 	return aligned_size / QEDR_CQE_SIZE;
695 }
696 
697 static inline int qedr_init_user_queue(struct ib_udata *udata,
698 				       struct qedr_dev *dev,
699 				       struct qedr_userq *q, u64 buf_addr,
700 				       size_t buf_len, int access, int dmasync,
701 				       int alloc_and_init)
702 {
703 	u32 fw_pages;
704 	int rc;
705 
706 	q->buf_addr = buf_addr;
707 	q->buf_len = buf_len;
708 	q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync);
709 	if (IS_ERR(q->umem)) {
710 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
711 		       PTR_ERR(q->umem));
712 		return PTR_ERR(q->umem);
713 	}
714 
715 	fw_pages = ib_umem_page_count(q->umem) <<
716 	    (PAGE_SHIFT - FW_PAGE_SHIFT);
717 
718 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
719 	if (rc)
720 		goto err0;
721 
722 	if (alloc_and_init) {
723 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
724 		if (IS_ERR(q->pbl_tbl)) {
725 			rc = PTR_ERR(q->pbl_tbl);
726 			goto err0;
727 		}
728 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
729 				   FW_PAGE_SHIFT);
730 	} else {
731 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
732 		if (!q->pbl_tbl) {
733 			rc = -ENOMEM;
734 			goto err0;
735 		}
736 	}
737 
738 	return 0;
739 
740 err0:
741 	ib_umem_release(q->umem);
742 	q->umem = NULL;
743 
744 	return rc;
745 }
746 
747 static inline void qedr_init_cq_params(struct qedr_cq *cq,
748 				       struct qedr_ucontext *ctx,
749 				       struct qedr_dev *dev, int vector,
750 				       int chain_entries, int page_cnt,
751 				       u64 pbl_ptr,
752 				       struct qed_rdma_create_cq_in_params
753 				       *params)
754 {
755 	memset(params, 0, sizeof(*params));
756 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
757 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
758 	params->cnq_id = vector;
759 	params->cq_size = chain_entries - 1;
760 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
761 	params->pbl_num_pages = page_cnt;
762 	params->pbl_ptr = pbl_ptr;
763 	params->pbl_two_level = 0;
764 }
765 
766 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
767 {
768 	cq->db.data.agg_flags = flags;
769 	cq->db.data.value = cpu_to_le32(cons);
770 	writeq(cq->db.raw, cq->db_addr);
771 }
772 
773 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
774 {
775 	struct qedr_cq *cq = get_qedr_cq(ibcq);
776 	unsigned long sflags;
777 	struct qedr_dev *dev;
778 
779 	dev = get_qedr_dev(ibcq->device);
780 
781 	if (cq->destroyed) {
782 		DP_ERR(dev,
783 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
784 		       cq, cq->icid);
785 		return -EINVAL;
786 	}
787 
788 
789 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
790 		return 0;
791 
792 	spin_lock_irqsave(&cq->cq_lock, sflags);
793 
794 	cq->arm_flags = 0;
795 
796 	if (flags & IB_CQ_SOLICITED)
797 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
798 
799 	if (flags & IB_CQ_NEXT_COMP)
800 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
801 
802 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
803 
804 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
805 
806 	return 0;
807 }
808 
809 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
810 		   struct ib_udata *udata)
811 {
812 	struct ib_device *ibdev = ibcq->device;
813 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
814 		udata, struct qedr_ucontext, ibucontext);
815 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
816 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
817 	struct qedr_dev *dev = get_qedr_dev(ibdev);
818 	struct qed_rdma_create_cq_in_params params;
819 	struct qedr_create_cq_ureq ureq = {};
820 	int vector = attr->comp_vector;
821 	int entries = attr->cqe;
822 	struct qedr_cq *cq = get_qedr_cq(ibcq);
823 	int chain_entries;
824 	int page_cnt;
825 	u64 pbl_ptr;
826 	u16 icid;
827 	int rc;
828 
829 	DP_DEBUG(dev, QEDR_MSG_INIT,
830 		 "create_cq: called from %s. entries=%d, vector=%d\n",
831 		 udata ? "User Lib" : "Kernel", entries, vector);
832 
833 	if (entries > QEDR_MAX_CQES) {
834 		DP_ERR(dev,
835 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
836 		       entries, QEDR_MAX_CQES);
837 		return -EINVAL;
838 	}
839 
840 	chain_entries = qedr_align_cq_entries(entries);
841 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
842 
843 	if (udata) {
844 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
845 			DP_ERR(dev,
846 			       "create cq: problem copying data from user space\n");
847 			goto err0;
848 		}
849 
850 		if (!ureq.len) {
851 			DP_ERR(dev,
852 			       "create cq: cannot create a cq with 0 entries\n");
853 			goto err0;
854 		}
855 
856 		cq->cq_type = QEDR_CQ_TYPE_USER;
857 
858 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
859 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1,
860 					  1);
861 		if (rc)
862 			goto err0;
863 
864 		pbl_ptr = cq->q.pbl_tbl->pa;
865 		page_cnt = cq->q.pbl_info.num_pbes;
866 
867 		cq->ibcq.cqe = chain_entries;
868 	} else {
869 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
870 
871 		rc = dev->ops->common->chain_alloc(dev->cdev,
872 						   QED_CHAIN_USE_TO_CONSUME,
873 						   QED_CHAIN_MODE_PBL,
874 						   QED_CHAIN_CNT_TYPE_U32,
875 						   chain_entries,
876 						   sizeof(union rdma_cqe),
877 						   &cq->pbl, NULL);
878 		if (rc)
879 			goto err1;
880 
881 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
882 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
883 		cq->ibcq.cqe = cq->pbl.capacity;
884 	}
885 
886 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
887 			    pbl_ptr, &params);
888 
889 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
890 	if (rc)
891 		goto err2;
892 
893 	cq->icid = icid;
894 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
895 	spin_lock_init(&cq->cq_lock);
896 
897 	if (udata) {
898 		rc = qedr_copy_cq_uresp(dev, cq, udata);
899 		if (rc)
900 			goto err3;
901 	} else {
902 		/* Generate doorbell address. */
903 		cq->db_addr = dev->db_addr +
904 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
905 		cq->db.data.icid = cq->icid;
906 		cq->db.data.params = DB_AGG_CMD_SET <<
907 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
908 
909 		/* point to the very last element, passing it we will toggle */
910 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
911 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
912 		cq->latest_cqe = NULL;
913 		consume_cqe(cq);
914 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
915 	}
916 
917 	DP_DEBUG(dev, QEDR_MSG_CQ,
918 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
919 		 cq->icid, cq, params.cq_size);
920 
921 	return 0;
922 
923 err3:
924 	destroy_iparams.icid = cq->icid;
925 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
926 				  &destroy_oparams);
927 err2:
928 	if (udata)
929 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
930 	else
931 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
932 err1:
933 	if (udata)
934 		ib_umem_release(cq->q.umem);
935 err0:
936 	return -EINVAL;
937 }
938 
939 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
940 {
941 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
942 	struct qedr_cq *cq = get_qedr_cq(ibcq);
943 
944 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
945 
946 	return 0;
947 }
948 
949 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
950 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
951 
952 void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
953 {
954 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
955 	struct qed_rdma_destroy_cq_out_params oparams;
956 	struct qed_rdma_destroy_cq_in_params iparams;
957 	struct qedr_cq *cq = get_qedr_cq(ibcq);
958 	int iter;
959 
960 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
961 
962 	cq->destroyed = 1;
963 
964 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
965 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
966 		return;
967 
968 	iparams.icid = cq->icid;
969 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
970 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
971 
972 	if (udata) {
973 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
974 		ib_umem_release(cq->q.umem);
975 	}
976 
977 	/* We don't want the IRQ handler to handle a non-existing CQ so we
978 	 * wait until all CNQ interrupts, if any, are received. This will always
979 	 * happen and will always happen very fast. If not, then a serious error
980 	 * has occured. That is why we can use a long delay.
981 	 * We spin for a short time so we don’t lose time on context switching
982 	 * in case all the completions are handled in that span. Otherwise
983 	 * we sleep for a while and check again. Since the CNQ may be
984 	 * associated with (only) the current CPU we use msleep to allow the
985 	 * current CPU to be freed.
986 	 * The CNQ notification is increased in qedr_irq_handler().
987 	 */
988 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
989 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
990 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
991 		iter--;
992 	}
993 
994 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
995 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
996 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
997 		iter--;
998 	}
999 
1000 	/* Note that we don't need to have explicit code to wait for the
1001 	 * completion of the event handler because it is invoked from the EQ.
1002 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1003 	 * be certain that there's no event handler in process.
1004 	 */
1005 }
1006 
1007 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1008 					  struct ib_qp_attr *attr,
1009 					  int attr_mask,
1010 					  struct qed_rdma_modify_qp_in_params
1011 					  *qp_params)
1012 {
1013 	const struct ib_gid_attr *gid_attr;
1014 	enum rdma_network_type nw_type;
1015 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1016 	u32 ipv4_addr;
1017 	int ret;
1018 	int i;
1019 
1020 	gid_attr = grh->sgid_attr;
1021 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1022 	if (ret)
1023 		return ret;
1024 
1025 	nw_type = rdma_gid_attr_network_type(gid_attr);
1026 	switch (nw_type) {
1027 	case RDMA_NETWORK_IPV6:
1028 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1029 		       sizeof(qp_params->sgid));
1030 		memcpy(&qp_params->dgid.bytes[0],
1031 		       &grh->dgid,
1032 		       sizeof(qp_params->dgid));
1033 		qp_params->roce_mode = ROCE_V2_IPV6;
1034 		SET_FIELD(qp_params->modify_flags,
1035 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1036 		break;
1037 	case RDMA_NETWORK_IB:
1038 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1039 		       sizeof(qp_params->sgid));
1040 		memcpy(&qp_params->dgid.bytes[0],
1041 		       &grh->dgid,
1042 		       sizeof(qp_params->dgid));
1043 		qp_params->roce_mode = ROCE_V1;
1044 		break;
1045 	case RDMA_NETWORK_IPV4:
1046 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1047 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1048 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1049 		qp_params->sgid.ipv4_addr = ipv4_addr;
1050 		ipv4_addr =
1051 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1052 		qp_params->dgid.ipv4_addr = ipv4_addr;
1053 		SET_FIELD(qp_params->modify_flags,
1054 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1055 		qp_params->roce_mode = ROCE_V2_IPV4;
1056 		break;
1057 	}
1058 
1059 	for (i = 0; i < 4; i++) {
1060 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1061 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1062 	}
1063 
1064 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1065 		qp_params->vlan_id = 0;
1066 
1067 	return 0;
1068 }
1069 
1070 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1071 			       struct ib_qp_init_attr *attrs,
1072 			       struct ib_udata *udata)
1073 {
1074 	struct qedr_device_attr *qattr = &dev->attr;
1075 
1076 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1077 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1078 		DP_DEBUG(dev, QEDR_MSG_QP,
1079 			 "create qp: unsupported qp type=0x%x requested\n",
1080 			 attrs->qp_type);
1081 		return -EINVAL;
1082 	}
1083 
1084 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1085 		DP_ERR(dev,
1086 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1087 		       attrs->cap.max_send_wr, qattr->max_sqe);
1088 		return -EINVAL;
1089 	}
1090 
1091 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1092 		DP_ERR(dev,
1093 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1094 		       attrs->cap.max_inline_data, qattr->max_inline);
1095 		return -EINVAL;
1096 	}
1097 
1098 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1099 		DP_ERR(dev,
1100 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1101 		       attrs->cap.max_send_sge, qattr->max_sge);
1102 		return -EINVAL;
1103 	}
1104 
1105 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1106 		DP_ERR(dev,
1107 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1108 		       attrs->cap.max_recv_sge, qattr->max_sge);
1109 		return -EINVAL;
1110 	}
1111 
1112 	/* Unprivileged user space cannot create special QP */
1113 	if (udata && attrs->qp_type == IB_QPT_GSI) {
1114 		DP_ERR(dev,
1115 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1116 		       attrs->qp_type);
1117 		return -EINVAL;
1118 	}
1119 
1120 	return 0;
1121 }
1122 
1123 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1124 			       struct qedr_srq *srq, struct ib_udata *udata)
1125 {
1126 	struct qedr_create_srq_uresp uresp = {};
1127 	int rc;
1128 
1129 	uresp.srq_id = srq->srq_id;
1130 
1131 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1132 	if (rc)
1133 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1134 
1135 	return rc;
1136 }
1137 
1138 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1139 			       struct qedr_create_qp_uresp *uresp,
1140 			       struct qedr_qp *qp)
1141 {
1142 	/* iWARP requires two doorbells per RQ. */
1143 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1144 		uresp->rq_db_offset =
1145 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1146 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1147 	} else {
1148 		uresp->rq_db_offset =
1149 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1150 	}
1151 
1152 	uresp->rq_icid = qp->icid;
1153 }
1154 
1155 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1156 			       struct qedr_create_qp_uresp *uresp,
1157 			       struct qedr_qp *qp)
1158 {
1159 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1160 
1161 	/* iWARP uses the same cid for rq and sq */
1162 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1163 		uresp->sq_icid = qp->icid;
1164 	else
1165 		uresp->sq_icid = qp->icid + 1;
1166 }
1167 
1168 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1169 			      struct qedr_qp *qp, struct ib_udata *udata)
1170 {
1171 	struct qedr_create_qp_uresp uresp;
1172 	int rc;
1173 
1174 	memset(&uresp, 0, sizeof(uresp));
1175 	qedr_copy_sq_uresp(dev, &uresp, qp);
1176 	qedr_copy_rq_uresp(dev, &uresp, qp);
1177 
1178 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1179 	uresp.qp_id = qp->qp_id;
1180 
1181 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1182 	if (rc)
1183 		DP_ERR(dev,
1184 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1185 		       qp->icid);
1186 
1187 	return rc;
1188 }
1189 
1190 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1191 				      struct qedr_qp *qp,
1192 				      struct qedr_pd *pd,
1193 				      struct ib_qp_init_attr *attrs)
1194 {
1195 	spin_lock_init(&qp->q_lock);
1196 	atomic_set(&qp->refcnt, 1);
1197 	qp->pd = pd;
1198 	qp->qp_type = attrs->qp_type;
1199 	qp->max_inline_data = attrs->cap.max_inline_data;
1200 	qp->sq.max_sges = attrs->cap.max_send_sge;
1201 	qp->state = QED_ROCE_QP_STATE_RESET;
1202 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1203 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1204 	qp->dev = dev;
1205 
1206 	if (attrs->srq) {
1207 		qp->srq = get_qedr_srq(attrs->srq);
1208 	} else {
1209 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1210 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1211 		DP_DEBUG(dev, QEDR_MSG_QP,
1212 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1213 			 qp->rq.max_sges, qp->rq_cq->icid);
1214 	}
1215 
1216 	DP_DEBUG(dev, QEDR_MSG_QP,
1217 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1218 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1219 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1220 	DP_DEBUG(dev, QEDR_MSG_QP,
1221 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1222 		 qp->sq.max_sges, qp->sq_cq->icid);
1223 }
1224 
1225 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1226 {
1227 	qp->sq.db = dev->db_addr +
1228 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1229 	qp->sq.db_data.data.icid = qp->icid + 1;
1230 	if (!qp->srq) {
1231 		qp->rq.db = dev->db_addr +
1232 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1233 		qp->rq.db_data.data.icid = qp->icid;
1234 	}
1235 }
1236 
1237 static int qedr_check_srq_params(struct qedr_dev *dev,
1238 				 struct ib_srq_init_attr *attrs,
1239 				 struct ib_udata *udata)
1240 {
1241 	struct qedr_device_attr *qattr = &dev->attr;
1242 
1243 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1244 		DP_ERR(dev,
1245 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1246 		       attrs->attr.max_wr, qattr->max_srq_wr);
1247 		return -EINVAL;
1248 	}
1249 
1250 	if (attrs->attr.max_sge > qattr->max_sge) {
1251 		DP_ERR(dev,
1252 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1253 		       attrs->attr.max_sge, qattr->max_sge);
1254 		return -EINVAL;
1255 	}
1256 
1257 	return 0;
1258 }
1259 
1260 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1261 {
1262 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1263 	ib_umem_release(srq->usrq.umem);
1264 	ib_umem_release(srq->prod_umem);
1265 }
1266 
1267 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1268 {
1269 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1270 	struct qedr_dev *dev = srq->dev;
1271 
1272 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1273 
1274 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1275 			  hw_srq->virt_prod_pair_addr,
1276 			  hw_srq->phy_prod_pair_addr);
1277 }
1278 
1279 static int qedr_init_srq_user_params(struct ib_udata *udata,
1280 				     struct qedr_srq *srq,
1281 				     struct qedr_create_srq_ureq *ureq,
1282 				     int access, int dmasync)
1283 {
1284 	struct scatterlist *sg;
1285 	int rc;
1286 
1287 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1288 				  ureq->srq_len, access, dmasync, 1);
1289 	if (rc)
1290 		return rc;
1291 
1292 	srq->prod_umem =
1293 		ib_umem_get(udata, ureq->prod_pair_addr,
1294 			    sizeof(struct rdma_srq_producers), access, dmasync);
1295 	if (IS_ERR(srq->prod_umem)) {
1296 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1297 		ib_umem_release(srq->usrq.umem);
1298 		DP_ERR(srq->dev,
1299 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1300 		       PTR_ERR(srq->prod_umem));
1301 		return PTR_ERR(srq->prod_umem);
1302 	}
1303 
1304 	sg = srq->prod_umem->sg_head.sgl;
1305 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1306 
1307 	return 0;
1308 }
1309 
1310 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1311 					struct qedr_dev *dev,
1312 					struct ib_srq_init_attr *init_attr)
1313 {
1314 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1315 	dma_addr_t phy_prod_pair_addr;
1316 	u32 num_elems;
1317 	void *va;
1318 	int rc;
1319 
1320 	va = dma_alloc_coherent(&dev->pdev->dev,
1321 				sizeof(struct rdma_srq_producers),
1322 				&phy_prod_pair_addr, GFP_KERNEL);
1323 	if (!va) {
1324 		DP_ERR(dev,
1325 		       "create srq: failed to allocate dma memory for producer\n");
1326 		return -ENOMEM;
1327 	}
1328 
1329 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1330 	hw_srq->virt_prod_pair_addr = va;
1331 
1332 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1333 	rc = dev->ops->common->chain_alloc(dev->cdev,
1334 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1335 					   QED_CHAIN_MODE_PBL,
1336 					   QED_CHAIN_CNT_TYPE_U32,
1337 					   num_elems,
1338 					   QEDR_SRQ_WQE_ELEM_SIZE,
1339 					   &hw_srq->pbl, NULL);
1340 	if (rc)
1341 		goto err0;
1342 
1343 	hw_srq->num_elems = num_elems;
1344 
1345 	return 0;
1346 
1347 err0:
1348 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1349 			  va, phy_prod_pair_addr);
1350 	return rc;
1351 }
1352 
1353 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1354 		    struct ib_udata *udata)
1355 {
1356 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1357 	struct qed_rdma_create_srq_in_params in_params = {};
1358 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1359 	struct qed_rdma_create_srq_out_params out_params;
1360 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1361 	struct qedr_create_srq_ureq ureq = {};
1362 	u64 pbl_base_addr, phy_prod_pair_addr;
1363 	struct qedr_srq_hwq_info *hw_srq;
1364 	u32 page_cnt, page_size;
1365 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1366 	int rc = 0;
1367 
1368 	DP_DEBUG(dev, QEDR_MSG_QP,
1369 		 "create SRQ called from %s (pd %p)\n",
1370 		 (udata) ? "User lib" : "kernel", pd);
1371 
1372 	rc = qedr_check_srq_params(dev, init_attr, udata);
1373 	if (rc)
1374 		return -EINVAL;
1375 
1376 	srq->dev = dev;
1377 	hw_srq = &srq->hw_srq;
1378 	spin_lock_init(&srq->lock);
1379 
1380 	hw_srq->max_wr = init_attr->attr.max_wr;
1381 	hw_srq->max_sges = init_attr->attr.max_sge;
1382 
1383 	if (udata) {
1384 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
1385 			DP_ERR(dev,
1386 			       "create srq: problem copying data from user space\n");
1387 			goto err0;
1388 		}
1389 
1390 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0);
1391 		if (rc)
1392 			goto err0;
1393 
1394 		page_cnt = srq->usrq.pbl_info.num_pbes;
1395 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1396 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1397 		page_size = PAGE_SIZE;
1398 	} else {
1399 		struct qed_chain *pbl;
1400 
1401 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1402 		if (rc)
1403 			goto err0;
1404 
1405 		pbl = &hw_srq->pbl;
1406 		page_cnt = qed_chain_get_page_cnt(pbl);
1407 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1408 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1409 		page_size = QED_CHAIN_PAGE_SIZE;
1410 	}
1411 
1412 	in_params.pd_id = pd->pd_id;
1413 	in_params.pbl_base_addr = pbl_base_addr;
1414 	in_params.prod_pair_addr = phy_prod_pair_addr;
1415 	in_params.num_pages = page_cnt;
1416 	in_params.page_size = page_size;
1417 
1418 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1419 	if (rc)
1420 		goto err1;
1421 
1422 	srq->srq_id = out_params.srq_id;
1423 
1424 	if (udata) {
1425 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1426 		if (rc)
1427 			goto err2;
1428 	}
1429 
1430 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1431 	if (rc)
1432 		goto err2;
1433 
1434 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1435 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1436 	return 0;
1437 
1438 err2:
1439 	destroy_in_params.srq_id = srq->srq_id;
1440 
1441 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1442 err1:
1443 	if (udata)
1444 		qedr_free_srq_user_params(srq);
1445 	else
1446 		qedr_free_srq_kernel_params(srq);
1447 err0:
1448 	return -EFAULT;
1449 }
1450 
1451 void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1452 {
1453 	struct qed_rdma_destroy_srq_in_params in_params = {};
1454 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1455 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1456 
1457 	xa_erase_irq(&dev->srqs, srq->srq_id);
1458 	in_params.srq_id = srq->srq_id;
1459 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1460 
1461 	if (ibsrq->uobject)
1462 		qedr_free_srq_user_params(srq);
1463 	else
1464 		qedr_free_srq_kernel_params(srq);
1465 
1466 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1467 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1468 		 srq->srq_id);
1469 }
1470 
1471 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1472 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1473 {
1474 	struct qed_rdma_modify_srq_in_params in_params = {};
1475 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1476 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1477 	int rc;
1478 
1479 	if (attr_mask & IB_SRQ_MAX_WR) {
1480 		DP_ERR(dev,
1481 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1482 		       attr_mask, srq);
1483 		return -EINVAL;
1484 	}
1485 
1486 	if (attr_mask & IB_SRQ_LIMIT) {
1487 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1488 			DP_ERR(dev,
1489 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1490 			       attr->srq_limit, srq->hw_srq.max_wr);
1491 			return -EINVAL;
1492 		}
1493 
1494 		in_params.srq_id = srq->srq_id;
1495 		in_params.wqe_limit = attr->srq_limit;
1496 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1497 		if (rc)
1498 			return rc;
1499 	}
1500 
1501 	srq->srq_limit = attr->srq_limit;
1502 
1503 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1504 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1505 
1506 	return 0;
1507 }
1508 
1509 static inline void
1510 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1511 			      struct qedr_pd *pd,
1512 			      struct qedr_qp *qp,
1513 			      struct ib_qp_init_attr *attrs,
1514 			      bool fmr_and_reserved_lkey,
1515 			      struct qed_rdma_create_qp_in_params *params)
1516 {
1517 	/* QP handle to be written in an async event */
1518 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1519 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1520 
1521 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1522 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1523 	params->pd = pd->pd_id;
1524 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1525 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1526 	params->stats_queue = 0;
1527 	params->srq_id = 0;
1528 	params->use_srq = false;
1529 
1530 	if (!qp->srq) {
1531 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1532 
1533 	} else {
1534 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1535 		params->srq_id = qp->srq->srq_id;
1536 		params->use_srq = true;
1537 	}
1538 }
1539 
1540 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1541 {
1542 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1543 		 "qp=%p. "
1544 		 "sq_addr=0x%llx, "
1545 		 "sq_len=%zd, "
1546 		 "rq_addr=0x%llx, "
1547 		 "rq_len=%zd"
1548 		 "\n",
1549 		 qp,
1550 		 qp->usq.buf_addr,
1551 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1552 }
1553 
1554 static inline void
1555 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1556 			    struct qedr_qp *qp,
1557 			    struct qed_rdma_create_qp_out_params *out_params)
1558 {
1559 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1560 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1561 
1562 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1563 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1564 	if (!qp->srq) {
1565 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1566 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1567 	}
1568 
1569 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1570 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1571 }
1572 
1573 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1574 {
1575 	ib_umem_release(qp->usq.umem);
1576 	qp->usq.umem = NULL;
1577 
1578 	ib_umem_release(qp->urq.umem);
1579 	qp->urq.umem = NULL;
1580 }
1581 
1582 static int qedr_create_user_qp(struct qedr_dev *dev,
1583 			       struct qedr_qp *qp,
1584 			       struct ib_pd *ibpd,
1585 			       struct ib_udata *udata,
1586 			       struct ib_qp_init_attr *attrs)
1587 {
1588 	struct qed_rdma_create_qp_in_params in_params;
1589 	struct qed_rdma_create_qp_out_params out_params;
1590 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1591 	struct qedr_create_qp_ureq ureq;
1592 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1593 	int rc = -EINVAL;
1594 
1595 	memset(&ureq, 0, sizeof(ureq));
1596 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1597 	if (rc) {
1598 		DP_ERR(dev, "Problem copying data from user space\n");
1599 		return rc;
1600 	}
1601 
1602 	/* SQ - read access only (0), dma sync not required (0) */
1603 	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1604 				  ureq.sq_len, 0, 0, alloc_and_init);
1605 	if (rc)
1606 		return rc;
1607 
1608 	if (!qp->srq) {
1609 		/* RQ - read access only (0), dma sync not required (0) */
1610 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1611 					  ureq.rq_len, 0, 0, alloc_and_init);
1612 		if (rc)
1613 			return rc;
1614 	}
1615 
1616 	memset(&in_params, 0, sizeof(in_params));
1617 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1618 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1619 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1620 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1621 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1622 	if (!qp->srq) {
1623 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1624 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1625 	}
1626 
1627 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1628 					      &in_params, &out_params);
1629 
1630 	if (!qp->qed_qp) {
1631 		rc = -ENOMEM;
1632 		goto err1;
1633 	}
1634 
1635 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1636 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1637 
1638 	qp->qp_id = out_params.qp_id;
1639 	qp->icid = out_params.icid;
1640 
1641 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1642 	if (rc)
1643 		goto err;
1644 
1645 	qedr_qp_user_print(dev, qp);
1646 
1647 	return 0;
1648 err:
1649 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1650 	if (rc)
1651 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1652 
1653 err1:
1654 	qedr_cleanup_user(dev, qp);
1655 	return rc;
1656 }
1657 
1658 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1659 {
1660 	qp->sq.db = dev->db_addr +
1661 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1662 	qp->sq.db_data.data.icid = qp->icid;
1663 
1664 	qp->rq.db = dev->db_addr +
1665 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1666 	qp->rq.db_data.data.icid = qp->icid;
1667 	qp->rq.iwarp_db2 = dev->db_addr +
1668 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1669 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1670 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1671 }
1672 
1673 static int
1674 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1675 			   struct qedr_qp *qp,
1676 			   struct qed_rdma_create_qp_in_params *in_params,
1677 			   u32 n_sq_elems, u32 n_rq_elems)
1678 {
1679 	struct qed_rdma_create_qp_out_params out_params;
1680 	int rc;
1681 
1682 	rc = dev->ops->common->chain_alloc(dev->cdev,
1683 					   QED_CHAIN_USE_TO_PRODUCE,
1684 					   QED_CHAIN_MODE_PBL,
1685 					   QED_CHAIN_CNT_TYPE_U32,
1686 					   n_sq_elems,
1687 					   QEDR_SQE_ELEMENT_SIZE,
1688 					   &qp->sq.pbl, NULL);
1689 
1690 	if (rc)
1691 		return rc;
1692 
1693 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1694 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1695 
1696 	rc = dev->ops->common->chain_alloc(dev->cdev,
1697 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1698 					   QED_CHAIN_MODE_PBL,
1699 					   QED_CHAIN_CNT_TYPE_U32,
1700 					   n_rq_elems,
1701 					   QEDR_RQE_ELEMENT_SIZE,
1702 					   &qp->rq.pbl, NULL);
1703 	if (rc)
1704 		return rc;
1705 
1706 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1707 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1708 
1709 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1710 					      in_params, &out_params);
1711 
1712 	if (!qp->qed_qp)
1713 		return -EINVAL;
1714 
1715 	qp->qp_id = out_params.qp_id;
1716 	qp->icid = out_params.icid;
1717 
1718 	qedr_set_roce_db_info(dev, qp);
1719 	return rc;
1720 }
1721 
1722 static int
1723 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1724 			    struct qedr_qp *qp,
1725 			    struct qed_rdma_create_qp_in_params *in_params,
1726 			    u32 n_sq_elems, u32 n_rq_elems)
1727 {
1728 	struct qed_rdma_create_qp_out_params out_params;
1729 	struct qed_chain_ext_pbl ext_pbl;
1730 	int rc;
1731 
1732 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1733 						     QEDR_SQE_ELEMENT_SIZE,
1734 						     QED_CHAIN_MODE_PBL);
1735 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1736 						     QEDR_RQE_ELEMENT_SIZE,
1737 						     QED_CHAIN_MODE_PBL);
1738 
1739 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1740 					      in_params, &out_params);
1741 
1742 	if (!qp->qed_qp)
1743 		return -EINVAL;
1744 
1745 	/* Now we allocate the chain */
1746 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1747 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1748 
1749 	rc = dev->ops->common->chain_alloc(dev->cdev,
1750 					   QED_CHAIN_USE_TO_PRODUCE,
1751 					   QED_CHAIN_MODE_PBL,
1752 					   QED_CHAIN_CNT_TYPE_U32,
1753 					   n_sq_elems,
1754 					   QEDR_SQE_ELEMENT_SIZE,
1755 					   &qp->sq.pbl, &ext_pbl);
1756 
1757 	if (rc)
1758 		goto err;
1759 
1760 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1761 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1762 
1763 	rc = dev->ops->common->chain_alloc(dev->cdev,
1764 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1765 					   QED_CHAIN_MODE_PBL,
1766 					   QED_CHAIN_CNT_TYPE_U32,
1767 					   n_rq_elems,
1768 					   QEDR_RQE_ELEMENT_SIZE,
1769 					   &qp->rq.pbl, &ext_pbl);
1770 
1771 	if (rc)
1772 		goto err;
1773 
1774 	qp->qp_id = out_params.qp_id;
1775 	qp->icid = out_params.icid;
1776 
1777 	qedr_set_iwarp_db_info(dev, qp);
1778 	return rc;
1779 
1780 err:
1781 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1782 
1783 	return rc;
1784 }
1785 
1786 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1787 {
1788 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1789 	kfree(qp->wqe_wr_id);
1790 
1791 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1792 	kfree(qp->rqe_wr_id);
1793 }
1794 
1795 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1796 				 struct qedr_qp *qp,
1797 				 struct ib_pd *ibpd,
1798 				 struct ib_qp_init_attr *attrs)
1799 {
1800 	struct qed_rdma_create_qp_in_params in_params;
1801 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1802 	int rc = -EINVAL;
1803 	u32 n_rq_elems;
1804 	u32 n_sq_elems;
1805 	u32 n_sq_entries;
1806 
1807 	memset(&in_params, 0, sizeof(in_params));
1808 
1809 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1810 	 * the ring. The ring should allow at least a single WR, even if the
1811 	 * user requested none, due to allocation issues.
1812 	 * We should add an extra WR since the prod and cons indices of
1813 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1814 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1815 	 * double the number of entries due an iSER issue that pushes far more
1816 	 * WRs than indicated. If we decline its ib_post_send() then we get
1817 	 * error prints in the dmesg we'd like to avoid.
1818 	 */
1819 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1820 			      dev->attr.max_sqe);
1821 
1822 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
1823 				GFP_KERNEL);
1824 	if (!qp->wqe_wr_id) {
1825 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1826 		return -ENOMEM;
1827 	}
1828 
1829 	/* QP handle to be written in CQE */
1830 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1831 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1832 
1833 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1834 	 * the ring. There ring should allow at least a single WR, even if the
1835 	 * user requested none, due to allocation issues.
1836 	 */
1837 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1838 
1839 	/* Allocate driver internal RQ array */
1840 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
1841 				GFP_KERNEL);
1842 	if (!qp->rqe_wr_id) {
1843 		DP_ERR(dev,
1844 		       "create qp: failed RQ shadow memory allocation\n");
1845 		kfree(qp->wqe_wr_id);
1846 		return -ENOMEM;
1847 	}
1848 
1849 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1850 
1851 	n_sq_entries = attrs->cap.max_send_wr;
1852 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1853 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1854 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1855 
1856 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1857 
1858 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1859 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1860 						 n_sq_elems, n_rq_elems);
1861 	else
1862 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1863 						n_sq_elems, n_rq_elems);
1864 	if (rc)
1865 		qedr_cleanup_kernel(dev, qp);
1866 
1867 	return rc;
1868 }
1869 
1870 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1871 			     struct ib_qp_init_attr *attrs,
1872 			     struct ib_udata *udata)
1873 {
1874 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1875 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1876 	struct qedr_qp *qp;
1877 	struct ib_qp *ibqp;
1878 	int rc = 0;
1879 
1880 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1881 		 udata ? "user library" : "kernel", pd);
1882 
1883 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
1884 	if (rc)
1885 		return ERR_PTR(rc);
1886 
1887 	DP_DEBUG(dev, QEDR_MSG_QP,
1888 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1889 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1890 		 get_qedr_cq(attrs->send_cq),
1891 		 get_qedr_cq(attrs->send_cq)->icid,
1892 		 get_qedr_cq(attrs->recv_cq),
1893 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
1894 
1895 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1896 	if (!qp) {
1897 		DP_ERR(dev, "create qp: failed allocating memory\n");
1898 		return ERR_PTR(-ENOMEM);
1899 	}
1900 
1901 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1902 
1903 	if (attrs->qp_type == IB_QPT_GSI) {
1904 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1905 		if (IS_ERR(ibqp))
1906 			kfree(qp);
1907 		return ibqp;
1908 	}
1909 
1910 	if (udata)
1911 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1912 	else
1913 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1914 
1915 	if (rc)
1916 		goto err;
1917 
1918 	qp->ibqp.qp_num = qp->qp_id;
1919 
1920 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1921 		rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
1922 		if (rc)
1923 			goto err;
1924 	}
1925 
1926 	return &qp->ibqp;
1927 
1928 err:
1929 	kfree(qp);
1930 
1931 	return ERR_PTR(-EFAULT);
1932 }
1933 
1934 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1935 {
1936 	switch (qp_state) {
1937 	case QED_ROCE_QP_STATE_RESET:
1938 		return IB_QPS_RESET;
1939 	case QED_ROCE_QP_STATE_INIT:
1940 		return IB_QPS_INIT;
1941 	case QED_ROCE_QP_STATE_RTR:
1942 		return IB_QPS_RTR;
1943 	case QED_ROCE_QP_STATE_RTS:
1944 		return IB_QPS_RTS;
1945 	case QED_ROCE_QP_STATE_SQD:
1946 		return IB_QPS_SQD;
1947 	case QED_ROCE_QP_STATE_ERR:
1948 		return IB_QPS_ERR;
1949 	case QED_ROCE_QP_STATE_SQE:
1950 		return IB_QPS_SQE;
1951 	}
1952 	return IB_QPS_ERR;
1953 }
1954 
1955 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1956 					enum ib_qp_state qp_state)
1957 {
1958 	switch (qp_state) {
1959 	case IB_QPS_RESET:
1960 		return QED_ROCE_QP_STATE_RESET;
1961 	case IB_QPS_INIT:
1962 		return QED_ROCE_QP_STATE_INIT;
1963 	case IB_QPS_RTR:
1964 		return QED_ROCE_QP_STATE_RTR;
1965 	case IB_QPS_RTS:
1966 		return QED_ROCE_QP_STATE_RTS;
1967 	case IB_QPS_SQD:
1968 		return QED_ROCE_QP_STATE_SQD;
1969 	case IB_QPS_ERR:
1970 		return QED_ROCE_QP_STATE_ERR;
1971 	default:
1972 		return QED_ROCE_QP_STATE_ERR;
1973 	}
1974 }
1975 
1976 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1977 {
1978 	qed_chain_reset(&qph->pbl);
1979 	qph->prod = 0;
1980 	qph->cons = 0;
1981 	qph->wqe_cons = 0;
1982 	qph->db_data.data.value = cpu_to_le16(0);
1983 }
1984 
1985 static int qedr_update_qp_state(struct qedr_dev *dev,
1986 				struct qedr_qp *qp,
1987 				enum qed_roce_qp_state cur_state,
1988 				enum qed_roce_qp_state new_state)
1989 {
1990 	int status = 0;
1991 
1992 	if (new_state == cur_state)
1993 		return 0;
1994 
1995 	switch (cur_state) {
1996 	case QED_ROCE_QP_STATE_RESET:
1997 		switch (new_state) {
1998 		case QED_ROCE_QP_STATE_INIT:
1999 			qp->prev_wqe_size = 0;
2000 			qedr_reset_qp_hwq_info(&qp->sq);
2001 			qedr_reset_qp_hwq_info(&qp->rq);
2002 			break;
2003 		default:
2004 			status = -EINVAL;
2005 			break;
2006 		}
2007 		break;
2008 	case QED_ROCE_QP_STATE_INIT:
2009 		switch (new_state) {
2010 		case QED_ROCE_QP_STATE_RTR:
2011 			/* Update doorbell (in case post_recv was
2012 			 * done before move to RTR)
2013 			 */
2014 
2015 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2016 				writel(qp->rq.db_data.raw, qp->rq.db);
2017 			}
2018 			break;
2019 		case QED_ROCE_QP_STATE_ERR:
2020 			break;
2021 		default:
2022 			/* Invalid state change. */
2023 			status = -EINVAL;
2024 			break;
2025 		}
2026 		break;
2027 	case QED_ROCE_QP_STATE_RTR:
2028 		/* RTR->XXX */
2029 		switch (new_state) {
2030 		case QED_ROCE_QP_STATE_RTS:
2031 			break;
2032 		case QED_ROCE_QP_STATE_ERR:
2033 			break;
2034 		default:
2035 			/* Invalid state change. */
2036 			status = -EINVAL;
2037 			break;
2038 		}
2039 		break;
2040 	case QED_ROCE_QP_STATE_RTS:
2041 		/* RTS->XXX */
2042 		switch (new_state) {
2043 		case QED_ROCE_QP_STATE_SQD:
2044 			break;
2045 		case QED_ROCE_QP_STATE_ERR:
2046 			break;
2047 		default:
2048 			/* Invalid state change. */
2049 			status = -EINVAL;
2050 			break;
2051 		}
2052 		break;
2053 	case QED_ROCE_QP_STATE_SQD:
2054 		/* SQD->XXX */
2055 		switch (new_state) {
2056 		case QED_ROCE_QP_STATE_RTS:
2057 		case QED_ROCE_QP_STATE_ERR:
2058 			break;
2059 		default:
2060 			/* Invalid state change. */
2061 			status = -EINVAL;
2062 			break;
2063 		}
2064 		break;
2065 	case QED_ROCE_QP_STATE_ERR:
2066 		/* ERR->XXX */
2067 		switch (new_state) {
2068 		case QED_ROCE_QP_STATE_RESET:
2069 			if ((qp->rq.prod != qp->rq.cons) ||
2070 			    (qp->sq.prod != qp->sq.cons)) {
2071 				DP_NOTICE(dev,
2072 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2073 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2074 					  qp->sq.cons);
2075 				status = -EINVAL;
2076 			}
2077 			break;
2078 		default:
2079 			status = -EINVAL;
2080 			break;
2081 		}
2082 		break;
2083 	default:
2084 		status = -EINVAL;
2085 		break;
2086 	}
2087 
2088 	return status;
2089 }
2090 
2091 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2092 		   int attr_mask, struct ib_udata *udata)
2093 {
2094 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2095 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2096 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2097 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2098 	enum ib_qp_state old_qp_state, new_qp_state;
2099 	enum qed_roce_qp_state cur_state;
2100 	int rc = 0;
2101 
2102 	DP_DEBUG(dev, QEDR_MSG_QP,
2103 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2104 		 attr->qp_state);
2105 
2106 	old_qp_state = qedr_get_ibqp_state(qp->state);
2107 	if (attr_mask & IB_QP_STATE)
2108 		new_qp_state = attr->qp_state;
2109 	else
2110 		new_qp_state = old_qp_state;
2111 
2112 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2113 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2114 					ibqp->qp_type, attr_mask)) {
2115 			DP_ERR(dev,
2116 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2117 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2118 			       attr_mask, qp->qp_id, ibqp->qp_type,
2119 			       old_qp_state, new_qp_state);
2120 			rc = -EINVAL;
2121 			goto err;
2122 		}
2123 	}
2124 
2125 	/* Translate the masks... */
2126 	if (attr_mask & IB_QP_STATE) {
2127 		SET_FIELD(qp_params.modify_flags,
2128 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2129 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2130 	}
2131 
2132 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2133 		qp_params.sqd_async = true;
2134 
2135 	if (attr_mask & IB_QP_PKEY_INDEX) {
2136 		SET_FIELD(qp_params.modify_flags,
2137 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2138 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2139 			rc = -EINVAL;
2140 			goto err;
2141 		}
2142 
2143 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2144 	}
2145 
2146 	if (attr_mask & IB_QP_QKEY)
2147 		qp->qkey = attr->qkey;
2148 
2149 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2150 		SET_FIELD(qp_params.modify_flags,
2151 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2152 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2153 						  IB_ACCESS_REMOTE_READ;
2154 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2155 						   IB_ACCESS_REMOTE_WRITE;
2156 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2157 					       IB_ACCESS_REMOTE_ATOMIC;
2158 	}
2159 
2160 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2161 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2162 			return -EINVAL;
2163 
2164 		if (attr_mask & IB_QP_PATH_MTU) {
2165 			if (attr->path_mtu < IB_MTU_256 ||
2166 			    attr->path_mtu > IB_MTU_4096) {
2167 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2168 				rc = -EINVAL;
2169 				goto err;
2170 			}
2171 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2172 				      ib_mtu_enum_to_int(iboe_get_mtu
2173 							 (dev->ndev->mtu)));
2174 		}
2175 
2176 		if (!qp->mtu) {
2177 			qp->mtu =
2178 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2179 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2180 		}
2181 
2182 		SET_FIELD(qp_params.modify_flags,
2183 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2184 
2185 		qp_params.traffic_class_tos = grh->traffic_class;
2186 		qp_params.flow_label = grh->flow_label;
2187 		qp_params.hop_limit_ttl = grh->hop_limit;
2188 
2189 		qp->sgid_idx = grh->sgid_index;
2190 
2191 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2192 		if (rc) {
2193 			DP_ERR(dev,
2194 			       "modify qp: problems with GID index %d (rc=%d)\n",
2195 			       grh->sgid_index, rc);
2196 			return rc;
2197 		}
2198 
2199 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2200 				   qp_params.remote_mac_addr);
2201 		if (rc)
2202 			return rc;
2203 
2204 		qp_params.use_local_mac = true;
2205 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2206 
2207 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2208 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2209 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2210 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2211 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2212 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2213 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2214 			 qp_params.remote_mac_addr);
2215 
2216 		qp_params.mtu = qp->mtu;
2217 		qp_params.lb_indication = false;
2218 	}
2219 
2220 	if (!qp_params.mtu) {
2221 		/* Stay with current MTU */
2222 		if (qp->mtu)
2223 			qp_params.mtu = qp->mtu;
2224 		else
2225 			qp_params.mtu =
2226 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2227 	}
2228 
2229 	if (attr_mask & IB_QP_TIMEOUT) {
2230 		SET_FIELD(qp_params.modify_flags,
2231 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2232 
2233 		/* The received timeout value is an exponent used like this:
2234 		 *    "12.7.34 LOCAL ACK TIMEOUT
2235 		 *    Value representing the transport (ACK) timeout for use by
2236 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2237 		 * The FW expects timeout in msec so we need to divide the usec
2238 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2239 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2240 		 * The value of zero means infinite so we use a 'max_t' to make
2241 		 * sure that sub 1 msec values will be configured as 1 msec.
2242 		 */
2243 		if (attr->timeout)
2244 			qp_params.ack_timeout =
2245 					1 << max_t(int, attr->timeout - 8, 0);
2246 		else
2247 			qp_params.ack_timeout = 0;
2248 	}
2249 
2250 	if (attr_mask & IB_QP_RETRY_CNT) {
2251 		SET_FIELD(qp_params.modify_flags,
2252 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2253 		qp_params.retry_cnt = attr->retry_cnt;
2254 	}
2255 
2256 	if (attr_mask & IB_QP_RNR_RETRY) {
2257 		SET_FIELD(qp_params.modify_flags,
2258 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2259 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2260 	}
2261 
2262 	if (attr_mask & IB_QP_RQ_PSN) {
2263 		SET_FIELD(qp_params.modify_flags,
2264 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2265 		qp_params.rq_psn = attr->rq_psn;
2266 		qp->rq_psn = attr->rq_psn;
2267 	}
2268 
2269 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2270 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2271 			rc = -EINVAL;
2272 			DP_ERR(dev,
2273 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2274 			       attr->max_rd_atomic,
2275 			       dev->attr.max_qp_req_rd_atomic_resc);
2276 			goto err;
2277 		}
2278 
2279 		SET_FIELD(qp_params.modify_flags,
2280 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2281 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2282 	}
2283 
2284 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2285 		SET_FIELD(qp_params.modify_flags,
2286 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2287 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2288 	}
2289 
2290 	if (attr_mask & IB_QP_SQ_PSN) {
2291 		SET_FIELD(qp_params.modify_flags,
2292 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2293 		qp_params.sq_psn = attr->sq_psn;
2294 		qp->sq_psn = attr->sq_psn;
2295 	}
2296 
2297 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2298 		if (attr->max_dest_rd_atomic >
2299 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2300 			DP_ERR(dev,
2301 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2302 			       attr->max_dest_rd_atomic,
2303 			       dev->attr.max_qp_resp_rd_atomic_resc);
2304 
2305 			rc = -EINVAL;
2306 			goto err;
2307 		}
2308 
2309 		SET_FIELD(qp_params.modify_flags,
2310 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2311 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2312 	}
2313 
2314 	if (attr_mask & IB_QP_DEST_QPN) {
2315 		SET_FIELD(qp_params.modify_flags,
2316 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2317 
2318 		qp_params.dest_qp = attr->dest_qp_num;
2319 		qp->dest_qp_num = attr->dest_qp_num;
2320 	}
2321 
2322 	cur_state = qp->state;
2323 
2324 	/* Update the QP state before the actual ramrod to prevent a race with
2325 	 * fast path. Modifying the QP state to error will cause the device to
2326 	 * flush the CQEs and while polling the flushed CQEs will considered as
2327 	 * a potential issue if the QP isn't in error state.
2328 	 */
2329 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2330 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2331 		qp->state = QED_ROCE_QP_STATE_ERR;
2332 
2333 	if (qp->qp_type != IB_QPT_GSI)
2334 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2335 					      qp->qed_qp, &qp_params);
2336 
2337 	if (attr_mask & IB_QP_STATE) {
2338 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2339 			rc = qedr_update_qp_state(dev, qp, cur_state,
2340 						  qp_params.new_state);
2341 		qp->state = qp_params.new_state;
2342 	}
2343 
2344 err:
2345 	return rc;
2346 }
2347 
2348 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2349 {
2350 	int ib_qp_acc_flags = 0;
2351 
2352 	if (params->incoming_rdma_write_en)
2353 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2354 	if (params->incoming_rdma_read_en)
2355 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2356 	if (params->incoming_atomic_en)
2357 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2358 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2359 	return ib_qp_acc_flags;
2360 }
2361 
2362 int qedr_query_qp(struct ib_qp *ibqp,
2363 		  struct ib_qp_attr *qp_attr,
2364 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2365 {
2366 	struct qed_rdma_query_qp_out_params params;
2367 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2368 	struct qedr_dev *dev = qp->dev;
2369 	int rc = 0;
2370 
2371 	memset(&params, 0, sizeof(params));
2372 
2373 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2374 	if (rc)
2375 		goto err;
2376 
2377 	memset(qp_attr, 0, sizeof(*qp_attr));
2378 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2379 
2380 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2381 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2382 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2383 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2384 	qp_attr->rq_psn = params.rq_psn;
2385 	qp_attr->sq_psn = params.sq_psn;
2386 	qp_attr->dest_qp_num = params.dest_qp;
2387 
2388 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2389 
2390 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2391 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2392 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2393 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2394 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2395 	qp_init_attr->cap = qp_attr->cap;
2396 
2397 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2398 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2399 			params.flow_label, qp->sgid_idx,
2400 			params.hop_limit_ttl, params.traffic_class_tos);
2401 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2402 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2403 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2404 	qp_attr->timeout = params.timeout;
2405 	qp_attr->rnr_retry = params.rnr_retry;
2406 	qp_attr->retry_cnt = params.retry_cnt;
2407 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2408 	qp_attr->pkey_index = params.pkey_index;
2409 	qp_attr->port_num = 1;
2410 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2411 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2412 	qp_attr->alt_pkey_index = 0;
2413 	qp_attr->alt_port_num = 0;
2414 	qp_attr->alt_timeout = 0;
2415 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2416 
2417 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2418 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2419 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2420 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2421 
2422 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2423 		 qp_attr->cap.max_inline_data);
2424 
2425 err:
2426 	return rc;
2427 }
2428 
2429 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2430 				  struct ib_udata *udata)
2431 {
2432 	int rc = 0;
2433 
2434 	if (qp->qp_type != IB_QPT_GSI) {
2435 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2436 		if (rc)
2437 			return rc;
2438 	}
2439 
2440 	if (udata)
2441 		qedr_cleanup_user(dev, qp);
2442 	else
2443 		qedr_cleanup_kernel(dev, qp);
2444 
2445 	return 0;
2446 }
2447 
2448 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2449 {
2450 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2451 	struct qedr_dev *dev = qp->dev;
2452 	struct ib_qp_attr attr;
2453 	int attr_mask = 0;
2454 	int rc = 0;
2455 
2456 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2457 		 qp, qp->qp_type);
2458 
2459 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2460 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2461 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2462 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2463 
2464 			attr.qp_state = IB_QPS_ERR;
2465 			attr_mask |= IB_QP_STATE;
2466 
2467 			/* Change the QP state to ERROR */
2468 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2469 		}
2470 	} else {
2471 		/* Wait for the connect/accept to complete */
2472 		if (qp->ep) {
2473 			int wait_count = 1;
2474 
2475 			while (qp->ep->during_connect) {
2476 				DP_DEBUG(dev, QEDR_MSG_QP,
2477 					 "Still in during connect/accept\n");
2478 
2479 				msleep(100);
2480 				if (wait_count++ > 200) {
2481 					DP_NOTICE(dev,
2482 						  "during connect timeout\n");
2483 					break;
2484 				}
2485 			}
2486 		}
2487 	}
2488 
2489 	if (qp->qp_type == IB_QPT_GSI)
2490 		qedr_destroy_gsi_qp(dev);
2491 
2492 	qedr_free_qp_resources(dev, qp, udata);
2493 
2494 	if (atomic_dec_and_test(&qp->refcnt) &&
2495 	    rdma_protocol_iwarp(&dev->ibdev, 1)) {
2496 		xa_erase_irq(&dev->qps, qp->qp_id);
2497 		kfree(qp);
2498 	}
2499 	return rc;
2500 }
2501 
2502 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
2503 		   struct ib_udata *udata)
2504 {
2505 	struct qedr_ah *ah = get_qedr_ah(ibah);
2506 
2507 	rdma_copy_ah_attr(&ah->attr, attr);
2508 
2509 	return 0;
2510 }
2511 
2512 void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2513 {
2514 	struct qedr_ah *ah = get_qedr_ah(ibah);
2515 
2516 	rdma_destroy_ah_attr(&ah->attr);
2517 }
2518 
2519 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2520 {
2521 	struct qedr_pbl *pbl, *tmp;
2522 
2523 	if (info->pbl_table)
2524 		list_add_tail(&info->pbl_table->list_entry,
2525 			      &info->free_pbl_list);
2526 
2527 	if (!list_empty(&info->inuse_pbl_list))
2528 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2529 
2530 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2531 		list_del(&pbl->list_entry);
2532 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2533 	}
2534 }
2535 
2536 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2537 			size_t page_list_len, bool two_layered)
2538 {
2539 	struct qedr_pbl *tmp;
2540 	int rc;
2541 
2542 	INIT_LIST_HEAD(&info->free_pbl_list);
2543 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2544 
2545 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2546 				  page_list_len, two_layered);
2547 	if (rc)
2548 		goto done;
2549 
2550 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2551 	if (IS_ERR(info->pbl_table)) {
2552 		rc = PTR_ERR(info->pbl_table);
2553 		goto done;
2554 	}
2555 
2556 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2557 		 &info->pbl_table->pa);
2558 
2559 	/* in usual case we use 2 PBLs, so we add one to free
2560 	 * list and allocating another one
2561 	 */
2562 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2563 	if (IS_ERR(tmp)) {
2564 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2565 		goto done;
2566 	}
2567 
2568 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2569 
2570 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2571 
2572 done:
2573 	if (rc)
2574 		free_mr_info(dev, info);
2575 
2576 	return rc;
2577 }
2578 
2579 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2580 			       u64 usr_addr, int acc, struct ib_udata *udata)
2581 {
2582 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2583 	struct qedr_mr *mr;
2584 	struct qedr_pd *pd;
2585 	int rc = -ENOMEM;
2586 
2587 	pd = get_qedr_pd(ibpd);
2588 	DP_DEBUG(dev, QEDR_MSG_MR,
2589 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2590 		 pd->pd_id, start, len, usr_addr, acc);
2591 
2592 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2593 		return ERR_PTR(-EINVAL);
2594 
2595 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2596 	if (!mr)
2597 		return ERR_PTR(rc);
2598 
2599 	mr->type = QEDR_MR_USER;
2600 
2601 	mr->umem = ib_umem_get(udata, start, len, acc, 0);
2602 	if (IS_ERR(mr->umem)) {
2603 		rc = -EFAULT;
2604 		goto err0;
2605 	}
2606 
2607 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2608 	if (rc)
2609 		goto err1;
2610 
2611 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2612 			   &mr->info.pbl_info, PAGE_SHIFT);
2613 
2614 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2615 	if (rc) {
2616 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2617 		goto err1;
2618 	}
2619 
2620 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2621 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2622 	mr->hw_mr.key = 0;
2623 	mr->hw_mr.pd = pd->pd_id;
2624 	mr->hw_mr.local_read = 1;
2625 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2626 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2627 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2628 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2629 	mr->hw_mr.mw_bind = false;
2630 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2631 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2632 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2633 	mr->hw_mr.page_size_log = PAGE_SHIFT;
2634 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2635 	mr->hw_mr.length = len;
2636 	mr->hw_mr.vaddr = usr_addr;
2637 	mr->hw_mr.zbva = false;
2638 	mr->hw_mr.phy_mr = false;
2639 	mr->hw_mr.dma_mr = false;
2640 
2641 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2642 	if (rc) {
2643 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2644 		goto err2;
2645 	}
2646 
2647 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2648 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2649 	    mr->hw_mr.remote_atomic)
2650 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2651 
2652 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2653 		 mr->ibmr.lkey);
2654 	return &mr->ibmr;
2655 
2656 err2:
2657 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2658 err1:
2659 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2660 err0:
2661 	kfree(mr);
2662 	return ERR_PTR(rc);
2663 }
2664 
2665 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2666 {
2667 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2668 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2669 	int rc = 0;
2670 
2671 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2672 	if (rc)
2673 		return rc;
2674 
2675 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2676 
2677 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2678 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2679 
2680 	/* it could be user registered memory. */
2681 	ib_umem_release(mr->umem);
2682 
2683 	kfree(mr);
2684 
2685 	return rc;
2686 }
2687 
2688 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2689 				       int max_page_list_len)
2690 {
2691 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2692 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2693 	struct qedr_mr *mr;
2694 	int rc = -ENOMEM;
2695 
2696 	DP_DEBUG(dev, QEDR_MSG_MR,
2697 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2698 		 max_page_list_len);
2699 
2700 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2701 	if (!mr)
2702 		return ERR_PTR(rc);
2703 
2704 	mr->dev = dev;
2705 	mr->type = QEDR_MR_FRMR;
2706 
2707 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2708 	if (rc)
2709 		goto err0;
2710 
2711 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2712 	if (rc) {
2713 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2714 		goto err0;
2715 	}
2716 
2717 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2718 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2719 	mr->hw_mr.key = 0;
2720 	mr->hw_mr.pd = pd->pd_id;
2721 	mr->hw_mr.local_read = 1;
2722 	mr->hw_mr.local_write = 0;
2723 	mr->hw_mr.remote_read = 0;
2724 	mr->hw_mr.remote_write = 0;
2725 	mr->hw_mr.remote_atomic = 0;
2726 	mr->hw_mr.mw_bind = false;
2727 	mr->hw_mr.pbl_ptr = 0;
2728 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2729 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2730 	mr->hw_mr.fbo = 0;
2731 	mr->hw_mr.length = 0;
2732 	mr->hw_mr.vaddr = 0;
2733 	mr->hw_mr.zbva = false;
2734 	mr->hw_mr.phy_mr = true;
2735 	mr->hw_mr.dma_mr = false;
2736 
2737 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2738 	if (rc) {
2739 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2740 		goto err1;
2741 	}
2742 
2743 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2744 	mr->ibmr.rkey = mr->ibmr.lkey;
2745 
2746 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2747 	return mr;
2748 
2749 err1:
2750 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2751 err0:
2752 	kfree(mr);
2753 	return ERR_PTR(rc);
2754 }
2755 
2756 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
2757 			    u32 max_num_sg, struct ib_udata *udata)
2758 {
2759 	struct qedr_mr *mr;
2760 
2761 	if (mr_type != IB_MR_TYPE_MEM_REG)
2762 		return ERR_PTR(-EINVAL);
2763 
2764 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2765 
2766 	if (IS_ERR(mr))
2767 		return ERR_PTR(-EINVAL);
2768 
2769 	return &mr->ibmr;
2770 }
2771 
2772 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2773 {
2774 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2775 	struct qedr_pbl *pbl_table;
2776 	struct regpair *pbe;
2777 	u32 pbes_in_page;
2778 
2779 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2780 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2781 		return -ENOMEM;
2782 	}
2783 
2784 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2785 		 mr->npages, addr);
2786 
2787 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2788 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2789 	pbe = (struct regpair *)pbl_table->va;
2790 	pbe +=  mr->npages % pbes_in_page;
2791 	pbe->lo = cpu_to_le32((u32)addr);
2792 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2793 
2794 	mr->npages++;
2795 
2796 	return 0;
2797 }
2798 
2799 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2800 {
2801 	int work = info->completed - info->completed_handled - 1;
2802 
2803 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2804 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2805 		struct qedr_pbl *pbl;
2806 
2807 		/* Free all the page list that are possible to be freed
2808 		 * (all the ones that were invalidated), under the assumption
2809 		 * that if an FMR was completed successfully that means that
2810 		 * if there was an invalidate operation before it also ended
2811 		 */
2812 		pbl = list_first_entry(&info->inuse_pbl_list,
2813 				       struct qedr_pbl, list_entry);
2814 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2815 		info->completed_handled++;
2816 	}
2817 }
2818 
2819 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2820 		   int sg_nents, unsigned int *sg_offset)
2821 {
2822 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2823 
2824 	mr->npages = 0;
2825 
2826 	handle_completed_mrs(mr->dev, &mr->info);
2827 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2828 }
2829 
2830 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2831 {
2832 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2833 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2834 	struct qedr_mr *mr;
2835 	int rc;
2836 
2837 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2838 	if (!mr)
2839 		return ERR_PTR(-ENOMEM);
2840 
2841 	mr->type = QEDR_MR_DMA;
2842 
2843 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2844 	if (rc) {
2845 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2846 		goto err1;
2847 	}
2848 
2849 	/* index only, 18 bit long, lkey = itid << 8 | key */
2850 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2851 	mr->hw_mr.pd = pd->pd_id;
2852 	mr->hw_mr.local_read = 1;
2853 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2854 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2855 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2856 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2857 	mr->hw_mr.dma_mr = true;
2858 
2859 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2860 	if (rc) {
2861 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2862 		goto err2;
2863 	}
2864 
2865 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2866 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2867 	    mr->hw_mr.remote_atomic)
2868 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2869 
2870 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2871 	return &mr->ibmr;
2872 
2873 err2:
2874 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2875 err1:
2876 	kfree(mr);
2877 	return ERR_PTR(rc);
2878 }
2879 
2880 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2881 {
2882 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2883 }
2884 
2885 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2886 {
2887 	int i, len = 0;
2888 
2889 	for (i = 0; i < num_sge; i++)
2890 		len += sg_list[i].length;
2891 
2892 	return len;
2893 }
2894 
2895 static void swap_wqe_data64(u64 *p)
2896 {
2897 	int i;
2898 
2899 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2900 		*p = cpu_to_be64(cpu_to_le64(*p));
2901 }
2902 
2903 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2904 				       struct qedr_qp *qp, u8 *wqe_size,
2905 				       const struct ib_send_wr *wr,
2906 				       const struct ib_send_wr **bad_wr,
2907 				       u8 *bits, u8 bit)
2908 {
2909 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2910 	char *seg_prt, *wqe;
2911 	int i, seg_siz;
2912 
2913 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2914 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2915 		*bad_wr = wr;
2916 		return 0;
2917 	}
2918 
2919 	if (!data_size)
2920 		return data_size;
2921 
2922 	*bits |= bit;
2923 
2924 	seg_prt = NULL;
2925 	wqe = NULL;
2926 	seg_siz = 0;
2927 
2928 	/* Copy data inline */
2929 	for (i = 0; i < wr->num_sge; i++) {
2930 		u32 len = wr->sg_list[i].length;
2931 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2932 
2933 		while (len > 0) {
2934 			u32 cur;
2935 
2936 			/* New segment required */
2937 			if (!seg_siz) {
2938 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2939 				seg_prt = wqe;
2940 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2941 				(*wqe_size)++;
2942 			}
2943 
2944 			/* Calculate currently allowed length */
2945 			cur = min_t(u32, len, seg_siz);
2946 			memcpy(seg_prt, src, cur);
2947 
2948 			/* Update segment variables */
2949 			seg_prt += cur;
2950 			seg_siz -= cur;
2951 
2952 			/* Update sge variables */
2953 			src += cur;
2954 			len -= cur;
2955 
2956 			/* Swap fully-completed segments */
2957 			if (!seg_siz)
2958 				swap_wqe_data64((u64 *)wqe);
2959 		}
2960 	}
2961 
2962 	/* swap last not completed segment */
2963 	if (seg_siz)
2964 		swap_wqe_data64((u64 *)wqe);
2965 
2966 	return data_size;
2967 }
2968 
2969 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2970 	do {							\
2971 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2972 		(sge)->length = cpu_to_le32(vlength);		\
2973 		(sge)->flags = cpu_to_le32(vflags);		\
2974 	} while (0)
2975 
2976 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2977 	do {							\
2978 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2979 		(hdr)->num_sges = num_sge;			\
2980 	} while (0)
2981 
2982 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2983 	do {							\
2984 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2985 		(sge)->length = cpu_to_le32(vlength);		\
2986 		(sge)->l_key = cpu_to_le32(vlkey);		\
2987 	} while (0)
2988 
2989 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2990 				const struct ib_send_wr *wr)
2991 {
2992 	u32 data_size = 0;
2993 	int i;
2994 
2995 	for (i = 0; i < wr->num_sge; i++) {
2996 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2997 
2998 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2999 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3000 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3001 		data_size += wr->sg_list[i].length;
3002 	}
3003 
3004 	if (wqe_size)
3005 		*wqe_size += wr->num_sge;
3006 
3007 	return data_size;
3008 }
3009 
3010 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3011 				     struct qedr_qp *qp,
3012 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3013 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3014 				     const struct ib_send_wr *wr,
3015 				     const struct ib_send_wr **bad_wr)
3016 {
3017 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3018 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3019 
3020 	if (wr->send_flags & IB_SEND_INLINE &&
3021 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3022 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3023 		u8 flags = 0;
3024 
3025 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3026 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3027 						   bad_wr, &rwqe->flags, flags);
3028 	}
3029 
3030 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3031 }
3032 
3033 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3034 				     struct qedr_qp *qp,
3035 				     struct rdma_sq_send_wqe_1st *swqe,
3036 				     struct rdma_sq_send_wqe_2st *swqe2,
3037 				     const struct ib_send_wr *wr,
3038 				     const struct ib_send_wr **bad_wr)
3039 {
3040 	memset(swqe2, 0, sizeof(*swqe2));
3041 	if (wr->send_flags & IB_SEND_INLINE) {
3042 		u8 flags = 0;
3043 
3044 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3045 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3046 						   bad_wr, &swqe->flags, flags);
3047 	}
3048 
3049 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3050 }
3051 
3052 static int qedr_prepare_reg(struct qedr_qp *qp,
3053 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3054 			    const struct ib_reg_wr *wr)
3055 {
3056 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3057 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3058 
3059 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3060 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3061 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3062 	fwqe1->l_key = wr->key;
3063 
3064 	fwqe2->access_ctrl = 0;
3065 
3066 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3067 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3068 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3069 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3070 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3071 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3072 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3073 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3074 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3075 	fwqe2->fmr_ctrl = 0;
3076 
3077 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3078 		   ilog2(mr->ibmr.page_size) - 12);
3079 
3080 	fwqe2->length_hi = 0;
3081 	fwqe2->length_lo = mr->ibmr.length;
3082 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3083 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3084 
3085 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3086 
3087 	return 0;
3088 }
3089 
3090 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3091 {
3092 	switch (opcode) {
3093 	case IB_WR_RDMA_WRITE:
3094 	case IB_WR_RDMA_WRITE_WITH_IMM:
3095 		return IB_WC_RDMA_WRITE;
3096 	case IB_WR_SEND_WITH_IMM:
3097 	case IB_WR_SEND:
3098 	case IB_WR_SEND_WITH_INV:
3099 		return IB_WC_SEND;
3100 	case IB_WR_RDMA_READ:
3101 	case IB_WR_RDMA_READ_WITH_INV:
3102 		return IB_WC_RDMA_READ;
3103 	case IB_WR_ATOMIC_CMP_AND_SWP:
3104 		return IB_WC_COMP_SWAP;
3105 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3106 		return IB_WC_FETCH_ADD;
3107 	case IB_WR_REG_MR:
3108 		return IB_WC_REG_MR;
3109 	case IB_WR_LOCAL_INV:
3110 		return IB_WC_LOCAL_INV;
3111 	default:
3112 		return IB_WC_SEND;
3113 	}
3114 }
3115 
3116 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3117 				      const struct ib_send_wr *wr)
3118 {
3119 	int wq_is_full, err_wr, pbl_is_full;
3120 	struct qedr_dev *dev = qp->dev;
3121 
3122 	/* prevent SQ overflow and/or processing of a bad WR */
3123 	err_wr = wr->num_sge > qp->sq.max_sges;
3124 	wq_is_full = qedr_wq_is_full(&qp->sq);
3125 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3126 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3127 	if (wq_is_full || err_wr || pbl_is_full) {
3128 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3129 			DP_ERR(dev,
3130 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3131 			       qp);
3132 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3133 		}
3134 
3135 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3136 			DP_ERR(dev,
3137 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3138 			       qp);
3139 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3140 		}
3141 
3142 		if (pbl_is_full &&
3143 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3144 			DP_ERR(dev,
3145 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3146 			       qp);
3147 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3148 		}
3149 		return false;
3150 	}
3151 	return true;
3152 }
3153 
3154 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3155 			    const struct ib_send_wr **bad_wr)
3156 {
3157 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3158 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3159 	struct rdma_sq_atomic_wqe_1st *awqe1;
3160 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3161 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3162 	struct rdma_sq_send_wqe_2st *swqe2;
3163 	struct rdma_sq_local_inv_wqe *iwqe;
3164 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3165 	struct rdma_sq_send_wqe_1st *swqe;
3166 	struct rdma_sq_rdma_wqe_1st *rwqe;
3167 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3168 	struct rdma_sq_common_wqe *wqe;
3169 	u32 length;
3170 	int rc = 0;
3171 	bool comp;
3172 
3173 	if (!qedr_can_post_send(qp, wr)) {
3174 		*bad_wr = wr;
3175 		return -ENOMEM;
3176 	}
3177 
3178 	wqe = qed_chain_produce(&qp->sq.pbl);
3179 	qp->wqe_wr_id[qp->sq.prod].signaled =
3180 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3181 
3182 	wqe->flags = 0;
3183 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3184 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3185 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3186 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3187 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3188 		   !!(wr->send_flags & IB_SEND_FENCE));
3189 	wqe->prev_wqe_size = qp->prev_wqe_size;
3190 
3191 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3192 
3193 	switch (wr->opcode) {
3194 	case IB_WR_SEND_WITH_IMM:
3195 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3196 			rc = -EINVAL;
3197 			*bad_wr = wr;
3198 			break;
3199 		}
3200 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3201 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3202 		swqe->wqe_size = 2;
3203 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3204 
3205 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3206 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3207 						   wr, bad_wr);
3208 		swqe->length = cpu_to_le32(length);
3209 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3210 		qp->prev_wqe_size = swqe->wqe_size;
3211 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3212 		break;
3213 	case IB_WR_SEND:
3214 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3215 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3216 
3217 		swqe->wqe_size = 2;
3218 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3219 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3220 						   wr, bad_wr);
3221 		swqe->length = cpu_to_le32(length);
3222 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3223 		qp->prev_wqe_size = swqe->wqe_size;
3224 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3225 		break;
3226 	case IB_WR_SEND_WITH_INV:
3227 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3228 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3229 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3230 		swqe->wqe_size = 2;
3231 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3232 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3233 						   wr, bad_wr);
3234 		swqe->length = cpu_to_le32(length);
3235 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3236 		qp->prev_wqe_size = swqe->wqe_size;
3237 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3238 		break;
3239 
3240 	case IB_WR_RDMA_WRITE_WITH_IMM:
3241 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3242 			rc = -EINVAL;
3243 			*bad_wr = wr;
3244 			break;
3245 		}
3246 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3247 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3248 
3249 		rwqe->wqe_size = 2;
3250 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3251 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3252 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3253 						   wr, bad_wr);
3254 		rwqe->length = cpu_to_le32(length);
3255 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3256 		qp->prev_wqe_size = rwqe->wqe_size;
3257 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3258 		break;
3259 	case IB_WR_RDMA_WRITE:
3260 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3261 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3262 
3263 		rwqe->wqe_size = 2;
3264 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3265 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3266 						   wr, bad_wr);
3267 		rwqe->length = cpu_to_le32(length);
3268 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3269 		qp->prev_wqe_size = rwqe->wqe_size;
3270 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3271 		break;
3272 	case IB_WR_RDMA_READ_WITH_INV:
3273 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3274 		/* fallthrough -- same is identical to RDMA READ */
3275 
3276 	case IB_WR_RDMA_READ:
3277 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3278 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3279 
3280 		rwqe->wqe_size = 2;
3281 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3282 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3283 						   wr, bad_wr);
3284 		rwqe->length = cpu_to_le32(length);
3285 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3286 		qp->prev_wqe_size = rwqe->wqe_size;
3287 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3288 		break;
3289 
3290 	case IB_WR_ATOMIC_CMP_AND_SWP:
3291 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3292 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3293 		awqe1->wqe_size = 4;
3294 
3295 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3296 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3297 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3298 
3299 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3300 
3301 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3302 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3303 			DMA_REGPAIR_LE(awqe3->swap_data,
3304 				       atomic_wr(wr)->compare_add);
3305 		} else {
3306 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3307 			DMA_REGPAIR_LE(awqe3->swap_data,
3308 				       atomic_wr(wr)->swap);
3309 			DMA_REGPAIR_LE(awqe3->cmp_data,
3310 				       atomic_wr(wr)->compare_add);
3311 		}
3312 
3313 		qedr_prepare_sq_sges(qp, NULL, wr);
3314 
3315 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3316 		qp->prev_wqe_size = awqe1->wqe_size;
3317 		break;
3318 
3319 	case IB_WR_LOCAL_INV:
3320 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3321 		iwqe->wqe_size = 1;
3322 
3323 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3324 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3325 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3326 		qp->prev_wqe_size = iwqe->wqe_size;
3327 		break;
3328 	case IB_WR_REG_MR:
3329 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3330 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3331 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3332 		fwqe1->wqe_size = 2;
3333 
3334 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3335 		if (rc) {
3336 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3337 			*bad_wr = wr;
3338 			break;
3339 		}
3340 
3341 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3342 		qp->prev_wqe_size = fwqe1->wqe_size;
3343 		break;
3344 	default:
3345 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3346 		rc = -EINVAL;
3347 		*bad_wr = wr;
3348 		break;
3349 	}
3350 
3351 	if (*bad_wr) {
3352 		u16 value;
3353 
3354 		/* Restore prod to its position before
3355 		 * this WR was processed
3356 		 */
3357 		value = le16_to_cpu(qp->sq.db_data.data.value);
3358 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3359 
3360 		/* Restore prev_wqe_size */
3361 		qp->prev_wqe_size = wqe->prev_wqe_size;
3362 		rc = -EINVAL;
3363 		DP_ERR(dev, "POST SEND FAILED\n");
3364 	}
3365 
3366 	return rc;
3367 }
3368 
3369 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3370 		   const struct ib_send_wr **bad_wr)
3371 {
3372 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3373 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3374 	unsigned long flags;
3375 	int rc = 0;
3376 
3377 	*bad_wr = NULL;
3378 
3379 	if (qp->qp_type == IB_QPT_GSI)
3380 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3381 
3382 	spin_lock_irqsave(&qp->q_lock, flags);
3383 
3384 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3385 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3386 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3387 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3388 			spin_unlock_irqrestore(&qp->q_lock, flags);
3389 			*bad_wr = wr;
3390 			DP_DEBUG(dev, QEDR_MSG_CQ,
3391 				 "QP in wrong state! QP icid=0x%x state %d\n",
3392 				 qp->icid, qp->state);
3393 			return -EINVAL;
3394 		}
3395 	}
3396 
3397 	while (wr) {
3398 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3399 		if (rc)
3400 			break;
3401 
3402 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3403 
3404 		qedr_inc_sw_prod(&qp->sq);
3405 
3406 		qp->sq.db_data.data.value++;
3407 
3408 		wr = wr->next;
3409 	}
3410 
3411 	/* Trigger doorbell
3412 	 * If there was a failure in the first WR then it will be triggered in
3413 	 * vane. However this is not harmful (as long as the producer value is
3414 	 * unchanged). For performance reasons we avoid checking for this
3415 	 * redundant doorbell.
3416 	 *
3417 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3418 	 * soon as we give the doorbell, we could get a completion
3419 	 * for this wr, therefore we need to make sure that the
3420 	 * memory is updated before giving the doorbell.
3421 	 * During qedr_poll_cq, rmb is called before accessing the
3422 	 * cqe. This covers for the smp_rmb as well.
3423 	 */
3424 	smp_wmb();
3425 	writel(qp->sq.db_data.raw, qp->sq.db);
3426 
3427 	spin_unlock_irqrestore(&qp->q_lock, flags);
3428 
3429 	return rc;
3430 }
3431 
3432 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3433 {
3434 	u32 used;
3435 
3436 	/* Calculate number of elements used based on producer
3437 	 * count and consumer count and subtract it from max
3438 	 * work request supported so that we get elements left.
3439 	 */
3440 	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3441 
3442 	return hw_srq->max_wr - used;
3443 }
3444 
3445 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3446 		       const struct ib_recv_wr **bad_wr)
3447 {
3448 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3449 	struct qedr_srq_hwq_info *hw_srq;
3450 	struct qedr_dev *dev = srq->dev;
3451 	struct qed_chain *pbl;
3452 	unsigned long flags;
3453 	int status = 0;
3454 	u32 num_sge;
3455 	u32 offset;
3456 
3457 	spin_lock_irqsave(&srq->lock, flags);
3458 
3459 	hw_srq = &srq->hw_srq;
3460 	pbl = &srq->hw_srq.pbl;
3461 	while (wr) {
3462 		struct rdma_srq_wqe_header *hdr;
3463 		int i;
3464 
3465 		if (!qedr_srq_elem_left(hw_srq) ||
3466 		    wr->num_sge > srq->hw_srq.max_sges) {
3467 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3468 			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3469 			       wr->num_sge, srq->hw_srq.max_sges);
3470 			status = -ENOMEM;
3471 			*bad_wr = wr;
3472 			break;
3473 		}
3474 
3475 		hdr = qed_chain_produce(pbl);
3476 		num_sge = wr->num_sge;
3477 		/* Set number of sge and work request id in header */
3478 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3479 
3480 		srq->hw_srq.wr_prod_cnt++;
3481 		hw_srq->wqe_prod++;
3482 		hw_srq->sge_prod++;
3483 
3484 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3485 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3486 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3487 
3488 		for (i = 0; i < wr->num_sge; i++) {
3489 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3490 
3491 			/* Set SGE length, lkey and address */
3492 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3493 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3494 
3495 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3496 				 "[%d]: len %d key %x addr %x:%x\n",
3497 				 i, srq_sge->length, srq_sge->l_key,
3498 				 srq_sge->addr.hi, srq_sge->addr.lo);
3499 			hw_srq->sge_prod++;
3500 		}
3501 
3502 		/* Flush WQE and SGE information before
3503 		 * updating producer.
3504 		 */
3505 		wmb();
3506 
3507 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3508 		 * in first 4 bytes and need to update WQE producer in
3509 		 * next 4 bytes.
3510 		 */
3511 		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3512 		offset = offsetof(struct rdma_srq_producers, wqe_prod);
3513 		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3514 			hw_srq->wqe_prod;
3515 
3516 		/* Flush producer after updating it. */
3517 		wmb();
3518 		wr = wr->next;
3519 	}
3520 
3521 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3522 		 qed_chain_get_elem_left(pbl));
3523 	spin_unlock_irqrestore(&srq->lock, flags);
3524 
3525 	return status;
3526 }
3527 
3528 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3529 		   const struct ib_recv_wr **bad_wr)
3530 {
3531 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3532 	struct qedr_dev *dev = qp->dev;
3533 	unsigned long flags;
3534 	int status = 0;
3535 
3536 	if (qp->qp_type == IB_QPT_GSI)
3537 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3538 
3539 	spin_lock_irqsave(&qp->q_lock, flags);
3540 
3541 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3542 		spin_unlock_irqrestore(&qp->q_lock, flags);
3543 		*bad_wr = wr;
3544 		return -EINVAL;
3545 	}
3546 
3547 	while (wr) {
3548 		int i;
3549 
3550 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3551 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3552 		    wr->num_sge > qp->rq.max_sges) {
3553 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3554 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3555 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3556 			       qp->rq.max_sges);
3557 			status = -ENOMEM;
3558 			*bad_wr = wr;
3559 			break;
3560 		}
3561 		for (i = 0; i < wr->num_sge; i++) {
3562 			u32 flags = 0;
3563 			struct rdma_rq_sge *rqe =
3564 			    qed_chain_produce(&qp->rq.pbl);
3565 
3566 			/* First one must include the number
3567 			 * of SGE in the list
3568 			 */
3569 			if (!i)
3570 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3571 					  wr->num_sge);
3572 
3573 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3574 				  wr->sg_list[i].lkey);
3575 
3576 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3577 				   wr->sg_list[i].length, flags);
3578 		}
3579 
3580 		/* Special case of no sges. FW requires between 1-4 sges...
3581 		 * in this case we need to post 1 sge with length zero. this is
3582 		 * because rdma write with immediate consumes an RQ.
3583 		 */
3584 		if (!wr->num_sge) {
3585 			u32 flags = 0;
3586 			struct rdma_rq_sge *rqe =
3587 			    qed_chain_produce(&qp->rq.pbl);
3588 
3589 			/* First one must include the number
3590 			 * of SGE in the list
3591 			 */
3592 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3593 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3594 
3595 			RQ_SGE_SET(rqe, 0, 0, flags);
3596 			i = 1;
3597 		}
3598 
3599 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3600 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3601 
3602 		qedr_inc_sw_prod(&qp->rq);
3603 
3604 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3605 		 * soon as we give the doorbell, we could get a completion
3606 		 * for this wr, therefore we need to make sure that the
3607 		 * memory is update before giving the doorbell.
3608 		 * During qedr_poll_cq, rmb is called before accessing the
3609 		 * cqe. This covers for the smp_rmb as well.
3610 		 */
3611 		smp_wmb();
3612 
3613 		qp->rq.db_data.data.value++;
3614 
3615 		writel(qp->rq.db_data.raw, qp->rq.db);
3616 
3617 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3618 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3619 		}
3620 
3621 		wr = wr->next;
3622 	}
3623 
3624 	spin_unlock_irqrestore(&qp->q_lock, flags);
3625 
3626 	return status;
3627 }
3628 
3629 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3630 {
3631 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3632 
3633 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3634 		cq->pbl_toggle;
3635 }
3636 
3637 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3638 {
3639 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3640 	struct qedr_qp *qp;
3641 
3642 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3643 						   resp_cqe->qp_handle.lo,
3644 						   u64);
3645 	return qp;
3646 }
3647 
3648 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3649 {
3650 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3651 
3652 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3653 }
3654 
3655 /* Return latest CQE (needs processing) */
3656 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3657 {
3658 	return cq->latest_cqe;
3659 }
3660 
3661 /* In fmr we need to increase the number of fmr completed counter for the fmr
3662  * algorithm determining whether we can free a pbl or not.
3663  * we need to perform this whether the work request was signaled or not. for
3664  * this purpose we call this function from the condition that checks if a wr
3665  * should be skipped, to make sure we don't miss it ( possibly this fmr
3666  * operation was not signalted)
3667  */
3668 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3669 {
3670 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3671 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3672 }
3673 
3674 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3675 		       struct qedr_cq *cq, int num_entries,
3676 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3677 		       int force)
3678 {
3679 	u16 cnt = 0;
3680 
3681 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3682 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3683 			qedr_chk_if_fmr(qp);
3684 			/* skip WC */
3685 			goto next_cqe;
3686 		}
3687 
3688 		/* fill WC */
3689 		wc->status = status;
3690 		wc->vendor_err = 0;
3691 		wc->wc_flags = 0;
3692 		wc->src_qp = qp->id;
3693 		wc->qp = &qp->ibqp;
3694 
3695 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3696 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3697 
3698 		switch (wc->opcode) {
3699 		case IB_WC_RDMA_WRITE:
3700 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3701 			break;
3702 		case IB_WC_COMP_SWAP:
3703 		case IB_WC_FETCH_ADD:
3704 			wc->byte_len = 8;
3705 			break;
3706 		case IB_WC_REG_MR:
3707 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3708 			break;
3709 		case IB_WC_RDMA_READ:
3710 		case IB_WC_SEND:
3711 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3712 			break;
3713 		default:
3714 			break;
3715 		}
3716 
3717 		num_entries--;
3718 		wc++;
3719 		cnt++;
3720 next_cqe:
3721 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3722 			qed_chain_consume(&qp->sq.pbl);
3723 		qedr_inc_sw_cons(&qp->sq);
3724 	}
3725 
3726 	return cnt;
3727 }
3728 
3729 static int qedr_poll_cq_req(struct qedr_dev *dev,
3730 			    struct qedr_qp *qp, struct qedr_cq *cq,
3731 			    int num_entries, struct ib_wc *wc,
3732 			    struct rdma_cqe_requester *req)
3733 {
3734 	int cnt = 0;
3735 
3736 	switch (req->status) {
3737 	case RDMA_CQE_REQ_STS_OK:
3738 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3739 				  IB_WC_SUCCESS, 0);
3740 		break;
3741 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3742 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3743 			DP_DEBUG(dev, QEDR_MSG_CQ,
3744 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3745 				 cq->icid, qp->icid);
3746 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3747 				  IB_WC_WR_FLUSH_ERR, 1);
3748 		break;
3749 	default:
3750 		/* process all WQE before the cosumer */
3751 		qp->state = QED_ROCE_QP_STATE_ERR;
3752 		cnt = process_req(dev, qp, cq, num_entries, wc,
3753 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3754 		wc += cnt;
3755 		/* if we have extra WC fill it with actual error info */
3756 		if (cnt < num_entries) {
3757 			enum ib_wc_status wc_status;
3758 
3759 			switch (req->status) {
3760 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3761 				DP_ERR(dev,
3762 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3763 				       cq->icid, qp->icid);
3764 				wc_status = IB_WC_BAD_RESP_ERR;
3765 				break;
3766 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3767 				DP_ERR(dev,
3768 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3769 				       cq->icid, qp->icid);
3770 				wc_status = IB_WC_LOC_LEN_ERR;
3771 				break;
3772 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3773 				DP_ERR(dev,
3774 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3775 				       cq->icid, qp->icid);
3776 				wc_status = IB_WC_LOC_QP_OP_ERR;
3777 				break;
3778 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3779 				DP_ERR(dev,
3780 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3781 				       cq->icid, qp->icid);
3782 				wc_status = IB_WC_LOC_PROT_ERR;
3783 				break;
3784 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3785 				DP_ERR(dev,
3786 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3787 				       cq->icid, qp->icid);
3788 				wc_status = IB_WC_MW_BIND_ERR;
3789 				break;
3790 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3791 				DP_ERR(dev,
3792 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3793 				       cq->icid, qp->icid);
3794 				wc_status = IB_WC_REM_INV_REQ_ERR;
3795 				break;
3796 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3797 				DP_ERR(dev,
3798 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3799 				       cq->icid, qp->icid);
3800 				wc_status = IB_WC_REM_ACCESS_ERR;
3801 				break;
3802 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3803 				DP_ERR(dev,
3804 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3805 				       cq->icid, qp->icid);
3806 				wc_status = IB_WC_REM_OP_ERR;
3807 				break;
3808 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3809 				DP_ERR(dev,
3810 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3811 				       cq->icid, qp->icid);
3812 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3813 				break;
3814 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3815 				DP_ERR(dev,
3816 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3817 				       cq->icid, qp->icid);
3818 				wc_status = IB_WC_RETRY_EXC_ERR;
3819 				break;
3820 			default:
3821 				DP_ERR(dev,
3822 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3823 				       cq->icid, qp->icid);
3824 				wc_status = IB_WC_GENERAL_ERR;
3825 			}
3826 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3827 					   wc_status, 1);
3828 		}
3829 	}
3830 
3831 	return cnt;
3832 }
3833 
3834 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3835 {
3836 	switch (status) {
3837 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3838 		return IB_WC_LOC_ACCESS_ERR;
3839 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3840 		return IB_WC_LOC_LEN_ERR;
3841 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3842 		return IB_WC_LOC_QP_OP_ERR;
3843 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3844 		return IB_WC_LOC_PROT_ERR;
3845 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3846 		return IB_WC_MW_BIND_ERR;
3847 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3848 		return IB_WC_REM_INV_RD_REQ_ERR;
3849 	case RDMA_CQE_RESP_STS_OK:
3850 		return IB_WC_SUCCESS;
3851 	default:
3852 		return IB_WC_GENERAL_ERR;
3853 	}
3854 }
3855 
3856 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3857 					  struct ib_wc *wc)
3858 {
3859 	wc->status = IB_WC_SUCCESS;
3860 	wc->byte_len = le32_to_cpu(resp->length);
3861 
3862 	if (resp->flags & QEDR_RESP_IMM) {
3863 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3864 		wc->wc_flags |= IB_WC_WITH_IMM;
3865 
3866 		if (resp->flags & QEDR_RESP_RDMA)
3867 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3868 
3869 		if (resp->flags & QEDR_RESP_INV)
3870 			return -EINVAL;
3871 
3872 	} else if (resp->flags & QEDR_RESP_INV) {
3873 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3874 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3875 
3876 		if (resp->flags & QEDR_RESP_RDMA)
3877 			return -EINVAL;
3878 
3879 	} else if (resp->flags & QEDR_RESP_RDMA) {
3880 		return -EINVAL;
3881 	}
3882 
3883 	return 0;
3884 }
3885 
3886 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3887 			       struct qedr_cq *cq, struct ib_wc *wc,
3888 			       struct rdma_cqe_responder *resp, u64 wr_id)
3889 {
3890 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3891 	wc->opcode = IB_WC_RECV;
3892 	wc->wc_flags = 0;
3893 
3894 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3895 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3896 			DP_ERR(dev,
3897 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3898 			       cq, cq->icid, resp->flags);
3899 
3900 	} else {
3901 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3902 		if (wc->status == IB_WC_GENERAL_ERR)
3903 			DP_ERR(dev,
3904 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3905 			       cq, cq->icid, resp->status);
3906 	}
3907 
3908 	/* Fill the rest of the WC */
3909 	wc->vendor_err = 0;
3910 	wc->src_qp = qp->id;
3911 	wc->qp = &qp->ibqp;
3912 	wc->wr_id = wr_id;
3913 }
3914 
3915 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
3916 				struct qedr_cq *cq, struct ib_wc *wc,
3917 				struct rdma_cqe_responder *resp)
3918 {
3919 	struct qedr_srq *srq = qp->srq;
3920 	u64 wr_id;
3921 
3922 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
3923 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
3924 
3925 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3926 		wc->status = IB_WC_WR_FLUSH_ERR;
3927 		wc->vendor_err = 0;
3928 		wc->wr_id = wr_id;
3929 		wc->byte_len = 0;
3930 		wc->src_qp = qp->id;
3931 		wc->qp = &qp->ibqp;
3932 		wc->wr_id = wr_id;
3933 	} else {
3934 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3935 	}
3936 	srq->hw_srq.wr_cons_cnt++;
3937 
3938 	return 1;
3939 }
3940 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3941 			    struct qedr_cq *cq, struct ib_wc *wc,
3942 			    struct rdma_cqe_responder *resp)
3943 {
3944 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3945 
3946 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3947 
3948 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3949 		qed_chain_consume(&qp->rq.pbl);
3950 	qedr_inc_sw_cons(&qp->rq);
3951 
3952 	return 1;
3953 }
3954 
3955 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3956 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3957 {
3958 	u16 cnt = 0;
3959 
3960 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3961 		/* fill WC */
3962 		wc->status = IB_WC_WR_FLUSH_ERR;
3963 		wc->vendor_err = 0;
3964 		wc->wc_flags = 0;
3965 		wc->src_qp = qp->id;
3966 		wc->byte_len = 0;
3967 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3968 		wc->qp = &qp->ibqp;
3969 		num_entries--;
3970 		wc++;
3971 		cnt++;
3972 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3973 			qed_chain_consume(&qp->rq.pbl);
3974 		qedr_inc_sw_cons(&qp->rq);
3975 	}
3976 
3977 	return cnt;
3978 }
3979 
3980 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3981 				 struct rdma_cqe_responder *resp, int *update)
3982 {
3983 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
3984 		consume_cqe(cq);
3985 		*update |= 1;
3986 	}
3987 }
3988 
3989 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
3990 				 struct qedr_cq *cq, int num_entries,
3991 				 struct ib_wc *wc,
3992 				 struct rdma_cqe_responder *resp)
3993 {
3994 	int cnt;
3995 
3996 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
3997 	consume_cqe(cq);
3998 
3999 	return cnt;
4000 }
4001 
4002 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4003 			     struct qedr_cq *cq, int num_entries,
4004 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4005 			     int *update)
4006 {
4007 	int cnt;
4008 
4009 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4010 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4011 					 resp->rq_cons_or_srq_id);
4012 		try_consume_resp_cqe(cq, qp, resp, update);
4013 	} else {
4014 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4015 		consume_cqe(cq);
4016 		*update |= 1;
4017 	}
4018 
4019 	return cnt;
4020 }
4021 
4022 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4023 				struct rdma_cqe_requester *req, int *update)
4024 {
4025 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4026 		consume_cqe(cq);
4027 		*update |= 1;
4028 	}
4029 }
4030 
4031 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4032 {
4033 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4034 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4035 	union rdma_cqe *cqe;
4036 	u32 old_cons, new_cons;
4037 	unsigned long flags;
4038 	int update = 0;
4039 	int done = 0;
4040 
4041 	if (cq->destroyed) {
4042 		DP_ERR(dev,
4043 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4044 		       cq, cq->icid);
4045 		return 0;
4046 	}
4047 
4048 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4049 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4050 
4051 	spin_lock_irqsave(&cq->cq_lock, flags);
4052 	cqe = cq->latest_cqe;
4053 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4054 	while (num_entries && is_valid_cqe(cq, cqe)) {
4055 		struct qedr_qp *qp;
4056 		int cnt = 0;
4057 
4058 		/* prevent speculative reads of any field of CQE */
4059 		rmb();
4060 
4061 		qp = cqe_get_qp(cqe);
4062 		if (!qp) {
4063 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4064 			break;
4065 		}
4066 
4067 		wc->qp = &qp->ibqp;
4068 
4069 		switch (cqe_get_type(cqe)) {
4070 		case RDMA_CQE_TYPE_REQUESTER:
4071 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4072 					       &cqe->req);
4073 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4074 			break;
4075 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4076 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4077 						&cqe->resp, &update);
4078 			break;
4079 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4080 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4081 						    wc, &cqe->resp);
4082 			update = 1;
4083 			break;
4084 		case RDMA_CQE_TYPE_INVALID:
4085 		default:
4086 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4087 			       cqe_get_type(cqe));
4088 		}
4089 		num_entries -= cnt;
4090 		wc += cnt;
4091 		done += cnt;
4092 
4093 		cqe = get_cqe(cq);
4094 	}
4095 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4096 
4097 	cq->cq_cons += new_cons - old_cons;
4098 
4099 	if (update)
4100 		/* doorbell notifies abount latest VALID entry,
4101 		 * but chain already point to the next INVALID one
4102 		 */
4103 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4104 
4105 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4106 	return done;
4107 }
4108 
4109 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4110 		     u8 port_num,
4111 		     const struct ib_wc *in_wc,
4112 		     const struct ib_grh *in_grh,
4113 		     const struct ib_mad_hdr *mad_hdr,
4114 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
4115 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4116 {
4117 	struct qedr_dev *dev = get_qedr_dev(ibdev);
4118 
4119 	DP_DEBUG(dev, QEDR_MSG_GSI,
4120 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
4121 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
4122 		 mad_hdr->class_specific, mad_hdr->class_version,
4123 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
4124 	return IB_MAD_RESULT_SUCCESS;
4125 }
4126