xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 83946783)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
55 
56 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
57 #define	RDMA_MAX_SGE_PER_SRQ	(4)
58 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
59 
60 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
61 
62 enum {
63 	QEDR_USER_MMAP_IO_WC = 0,
64 	QEDR_USER_MMAP_PHYS_PAGE,
65 };
66 
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
68 					size_t len)
69 {
70 	size_t min_len = min_t(size_t, len, udata->outlen);
71 
72 	return ib_copy_to_udata(udata, src, min_len);
73 }
74 
75 int qedr_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
76 {
77 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
78 		return -EINVAL;
79 
80 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
81 	return 0;
82 }
83 
84 int qedr_iw_query_gid(struct ib_device *ibdev, u32 port,
85 		      int index, union ib_gid *sgid)
86 {
87 	struct qedr_dev *dev = get_qedr_dev(ibdev);
88 
89 	memset(sgid->raw, 0, sizeof(sgid->raw));
90 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
91 
92 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 		 sgid->global.interface_id, sgid->global.subnet_prefix);
94 
95 	return 0;
96 }
97 
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
99 {
100 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 	struct qedr_device_attr *qattr = &dev->attr;
102 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
103 
104 	srq_attr->srq_limit = srq->srq_limit;
105 	srq_attr->max_wr = qattr->max_srq_wr;
106 	srq_attr->max_sge = qattr->max_sge;
107 
108 	return 0;
109 }
110 
111 int qedr_query_device(struct ib_device *ibdev,
112 		      struct ib_device_attr *attr, struct ib_udata *udata)
113 {
114 	struct qedr_dev *dev = get_qedr_dev(ibdev);
115 	struct qedr_device_attr *qattr = &dev->attr;
116 
117 	if (!dev->rdma_ctx) {
118 		DP_ERR(dev,
119 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
120 		       dev->rdma_ctx);
121 		return -EINVAL;
122 	}
123 
124 	memset(attr, 0, sizeof(*attr));
125 
126 	attr->fw_ver = qattr->fw_ver;
127 	attr->sys_image_guid = qattr->sys_image_guid;
128 	attr->max_mr_size = qattr->max_mr_size;
129 	attr->page_size_cap = qattr->page_size_caps;
130 	attr->vendor_id = qattr->vendor_id;
131 	attr->vendor_part_id = qattr->vendor_part_id;
132 	attr->hw_ver = qattr->hw_ver;
133 	attr->max_qp = qattr->max_qp;
134 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 	    IB_DEVICE_RC_RNR_NAK_GEN |
137 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
138 
139 	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
140 		attr->device_cap_flags |= IB_DEVICE_XRC;
141 	attr->max_send_sge = qattr->max_sge;
142 	attr->max_recv_sge = qattr->max_sge;
143 	attr->max_sge_rd = qattr->max_sge;
144 	attr->max_cq = qattr->max_cq;
145 	attr->max_cqe = qattr->max_cqe;
146 	attr->max_mr = qattr->max_mr;
147 	attr->max_mw = qattr->max_mw;
148 	attr->max_pd = qattr->max_pd;
149 	attr->atomic_cap = dev->atomic_cap;
150 	attr->max_qp_init_rd_atom =
151 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
152 	attr->max_qp_rd_atom =
153 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
154 		attr->max_qp_init_rd_atom);
155 
156 	attr->max_srq = qattr->max_srq;
157 	attr->max_srq_sge = qattr->max_srq_sge;
158 	attr->max_srq_wr = qattr->max_srq_wr;
159 
160 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
161 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
162 	attr->max_pkeys = qattr->max_pkey;
163 	attr->max_ah = qattr->max_ah;
164 
165 	return 0;
166 }
167 
168 static inline void get_link_speed_and_width(int speed, u16 *ib_speed,
169 					    u8 *ib_width)
170 {
171 	switch (speed) {
172 	case 1000:
173 		*ib_speed = IB_SPEED_SDR;
174 		*ib_width = IB_WIDTH_1X;
175 		break;
176 	case 10000:
177 		*ib_speed = IB_SPEED_QDR;
178 		*ib_width = IB_WIDTH_1X;
179 		break;
180 
181 	case 20000:
182 		*ib_speed = IB_SPEED_DDR;
183 		*ib_width = IB_WIDTH_4X;
184 		break;
185 
186 	case 25000:
187 		*ib_speed = IB_SPEED_EDR;
188 		*ib_width = IB_WIDTH_1X;
189 		break;
190 
191 	case 40000:
192 		*ib_speed = IB_SPEED_QDR;
193 		*ib_width = IB_WIDTH_4X;
194 		break;
195 
196 	case 50000:
197 		*ib_speed = IB_SPEED_HDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 100000:
202 		*ib_speed = IB_SPEED_EDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	default:
207 		/* Unsupported */
208 		*ib_speed = IB_SPEED_SDR;
209 		*ib_width = IB_WIDTH_1X;
210 	}
211 }
212 
213 int qedr_query_port(struct ib_device *ibdev, u32 port,
214 		    struct ib_port_attr *attr)
215 {
216 	struct qedr_dev *dev;
217 	struct qed_rdma_port *rdma_port;
218 
219 	dev = get_qedr_dev(ibdev);
220 
221 	if (!dev->rdma_ctx) {
222 		DP_ERR(dev, "rdma_ctx is NULL\n");
223 		return -EINVAL;
224 	}
225 
226 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
227 
228 	/* *attr being zeroed by the caller, avoid zeroing it here */
229 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
230 		attr->state = IB_PORT_ACTIVE;
231 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
232 	} else {
233 		attr->state = IB_PORT_DOWN;
234 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
235 	}
236 	attr->max_mtu = IB_MTU_4096;
237 	attr->lid = 0;
238 	attr->lmc = 0;
239 	attr->sm_lid = 0;
240 	attr->sm_sl = 0;
241 	attr->ip_gids = true;
242 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
243 		attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu);
244 		attr->gid_tbl_len = 1;
245 	} else {
246 		attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
247 		attr->gid_tbl_len = QEDR_MAX_SGID;
248 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
249 	}
250 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
251 	attr->qkey_viol_cntr = 0;
252 	get_link_speed_and_width(rdma_port->link_speed,
253 				 &attr->active_speed, &attr->active_width);
254 	attr->max_msg_sz = rdma_port->max_msg_size;
255 	attr->max_vl_num = 4;
256 
257 	return 0;
258 }
259 
260 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
261 {
262 	struct ib_device *ibdev = uctx->device;
263 	int rc;
264 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
265 	struct qedr_alloc_ucontext_resp uresp = {};
266 	struct qedr_alloc_ucontext_req ureq = {};
267 	struct qedr_dev *dev = get_qedr_dev(ibdev);
268 	struct qed_rdma_add_user_out_params oparams;
269 	struct qedr_user_mmap_entry *entry;
270 
271 	if (!udata)
272 		return -EFAULT;
273 
274 	if (udata->inlen) {
275 		rc = ib_copy_from_udata(&ureq, udata,
276 					min(sizeof(ureq), udata->inlen));
277 		if (rc) {
278 			DP_ERR(dev, "Problem copying data from user space\n");
279 			return -EFAULT;
280 		}
281 		ctx->edpm_mode = !!(ureq.context_flags &
282 				    QEDR_ALLOC_UCTX_EDPM_MODE);
283 		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
284 	}
285 
286 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
287 	if (rc) {
288 		DP_ERR(dev,
289 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
290 		       rc);
291 		return rc;
292 	}
293 
294 	ctx->dpi = oparams.dpi;
295 	ctx->dpi_addr = oparams.dpi_addr;
296 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
297 	ctx->dpi_size = oparams.dpi_size;
298 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
299 	if (!entry) {
300 		rc = -ENOMEM;
301 		goto err;
302 	}
303 
304 	entry->io_address = ctx->dpi_phys_addr;
305 	entry->length = ctx->dpi_size;
306 	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
307 	entry->dpi = ctx->dpi;
308 	entry->dev = dev;
309 	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
310 					 ctx->dpi_size);
311 	if (rc) {
312 		kfree(entry);
313 		goto err;
314 	}
315 	ctx->db_mmap_entry = &entry->rdma_entry;
316 
317 	if (!dev->user_dpm_enabled)
318 		uresp.dpm_flags = 0;
319 	else if (rdma_protocol_iwarp(&dev->ibdev, 1))
320 		uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
321 	else
322 		uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
323 				  QEDR_DPM_TYPE_ROCE_LEGACY |
324 				  QEDR_DPM_TYPE_ROCE_EDPM_MODE;
325 
326 	if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
327 		uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
328 		uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
329 		uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
330 		uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
331 	}
332 
333 	uresp.wids_enabled = 1;
334 	uresp.wid_count = oparams.wid_count;
335 	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
336 	uresp.db_size = ctx->dpi_size;
337 	uresp.max_send_wr = dev->attr.max_sqe;
338 	uresp.max_recv_wr = dev->attr.max_rqe;
339 	uresp.max_srq_wr = dev->attr.max_srq_wr;
340 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
341 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
342 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
343 	uresp.max_cqes = QEDR_MAX_CQES;
344 
345 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
346 	if (rc)
347 		goto err;
348 
349 	ctx->dev = dev;
350 
351 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
352 		 &ctx->ibucontext);
353 	return 0;
354 
355 err:
356 	if (!ctx->db_mmap_entry)
357 		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
358 	else
359 		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
360 
361 	return rc;
362 }
363 
364 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
365 {
366 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
367 
368 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
369 		 uctx);
370 
371 	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
372 }
373 
374 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
375 {
376 	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
377 	struct qedr_dev *dev = entry->dev;
378 
379 	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
380 		free_page((unsigned long)entry->address);
381 	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
382 		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
383 
384 	kfree(entry);
385 }
386 
387 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
388 {
389 	struct ib_device *dev = ucontext->device;
390 	size_t length = vma->vm_end - vma->vm_start;
391 	struct rdma_user_mmap_entry *rdma_entry;
392 	struct qedr_user_mmap_entry *entry;
393 	int rc = 0;
394 	u64 pfn;
395 
396 	ibdev_dbg(dev,
397 		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
398 		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
399 
400 	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
401 	if (!rdma_entry) {
402 		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
403 			  vma->vm_pgoff);
404 		return -EINVAL;
405 	}
406 	entry = get_qedr_mmap_entry(rdma_entry);
407 	ibdev_dbg(dev,
408 		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
409 		  entry->io_address, length, entry->mmap_flag);
410 
411 	switch (entry->mmap_flag) {
412 	case QEDR_USER_MMAP_IO_WC:
413 		pfn = entry->io_address >> PAGE_SHIFT;
414 		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
415 				       pgprot_writecombine(vma->vm_page_prot),
416 				       rdma_entry);
417 		break;
418 	case QEDR_USER_MMAP_PHYS_PAGE:
419 		rc = vm_insert_page(vma, vma->vm_start,
420 				    virt_to_page(entry->address));
421 		break;
422 	default:
423 		rc = -EINVAL;
424 	}
425 
426 	if (rc)
427 		ibdev_dbg(dev,
428 			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
429 			  entry->io_address, length, entry->mmap_flag, rc);
430 
431 	rdma_user_mmap_entry_put(rdma_entry);
432 	return rc;
433 }
434 
435 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
436 {
437 	struct ib_device *ibdev = ibpd->device;
438 	struct qedr_dev *dev = get_qedr_dev(ibdev);
439 	struct qedr_pd *pd = get_qedr_pd(ibpd);
440 	u16 pd_id;
441 	int rc;
442 
443 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
444 		 udata ? "User Lib" : "Kernel");
445 
446 	if (!dev->rdma_ctx) {
447 		DP_ERR(dev, "invalid RDMA context\n");
448 		return -EINVAL;
449 	}
450 
451 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
452 	if (rc)
453 		return rc;
454 
455 	pd->pd_id = pd_id;
456 
457 	if (udata) {
458 		struct qedr_alloc_pd_uresp uresp = {
459 			.pd_id = pd_id,
460 		};
461 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
462 			udata, struct qedr_ucontext, ibucontext);
463 
464 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
465 		if (rc) {
466 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
467 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
468 			return rc;
469 		}
470 
471 		pd->uctx = context;
472 		pd->uctx->pd = pd;
473 	}
474 
475 	return 0;
476 }
477 
478 int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
479 {
480 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
481 	struct qedr_pd *pd = get_qedr_pd(ibpd);
482 
483 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
484 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
485 	return 0;
486 }
487 
488 
489 int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
490 {
491 	struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
492 	struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd);
493 
494 	return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id);
495 }
496 
497 int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
498 {
499 	struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
500 	u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id;
501 
502 	dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id);
503 	return 0;
504 }
505 static void qedr_free_pbl(struct qedr_dev *dev,
506 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
507 {
508 	struct pci_dev *pdev = dev->pdev;
509 	int i;
510 
511 	for (i = 0; i < pbl_info->num_pbls; i++) {
512 		if (!pbl[i].va)
513 			continue;
514 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
515 				  pbl[i].va, pbl[i].pa);
516 	}
517 
518 	kfree(pbl);
519 }
520 
521 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
522 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
523 
524 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
525 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
526 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
527 
528 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
529 					   struct qedr_pbl_info *pbl_info,
530 					   gfp_t flags)
531 {
532 	struct pci_dev *pdev = dev->pdev;
533 	struct qedr_pbl *pbl_table;
534 	dma_addr_t *pbl_main_tbl;
535 	dma_addr_t pa;
536 	void *va;
537 	int i;
538 
539 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
540 	if (!pbl_table)
541 		return ERR_PTR(-ENOMEM);
542 
543 	for (i = 0; i < pbl_info->num_pbls; i++) {
544 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
545 					flags);
546 		if (!va)
547 			goto err;
548 
549 		pbl_table[i].va = va;
550 		pbl_table[i].pa = pa;
551 	}
552 
553 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
554 	 * the first one with physical pointers to all of the rest
555 	 */
556 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
557 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
558 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
559 
560 	return pbl_table;
561 
562 err:
563 	for (i--; i >= 0; i--)
564 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
565 				  pbl_table[i].va, pbl_table[i].pa);
566 
567 	qedr_free_pbl(dev, pbl_info, pbl_table);
568 
569 	return ERR_PTR(-ENOMEM);
570 }
571 
572 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
573 				struct qedr_pbl_info *pbl_info,
574 				u32 num_pbes, int two_layer_capable)
575 {
576 	u32 pbl_capacity;
577 	u32 pbl_size;
578 	u32 num_pbls;
579 
580 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
581 		if (num_pbes > MAX_PBES_TWO_LAYER) {
582 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
583 			       num_pbes);
584 			return -EINVAL;
585 		}
586 
587 		/* calculate required pbl page size */
588 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
589 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
590 			       NUM_PBES_ON_PAGE(pbl_size);
591 
592 		while (pbl_capacity < num_pbes) {
593 			pbl_size *= 2;
594 			pbl_capacity = pbl_size / sizeof(u64);
595 			pbl_capacity = pbl_capacity * pbl_capacity;
596 		}
597 
598 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
599 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
600 		pbl_info->two_layered = true;
601 	} else {
602 		/* One layered PBL */
603 		num_pbls = 1;
604 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
605 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
606 		pbl_info->two_layered = false;
607 	}
608 
609 	pbl_info->num_pbls = num_pbls;
610 	pbl_info->pbl_size = pbl_size;
611 	pbl_info->num_pbes = num_pbes;
612 
613 	DP_DEBUG(dev, QEDR_MSG_MR,
614 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
615 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
616 
617 	return 0;
618 }
619 
620 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
621 			       struct qedr_pbl *pbl,
622 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
623 {
624 	int pbe_cnt, total_num_pbes = 0;
625 	struct qedr_pbl *pbl_tbl;
626 	struct ib_block_iter biter;
627 	struct regpair *pbe;
628 
629 	if (!pbl_info->num_pbes)
630 		return;
631 
632 	/* If we have a two layered pbl, the first pbl points to the rest
633 	 * of the pbls and the first entry lays on the second pbl in the table
634 	 */
635 	if (pbl_info->two_layered)
636 		pbl_tbl = &pbl[1];
637 	else
638 		pbl_tbl = pbl;
639 
640 	pbe = (struct regpair *)pbl_tbl->va;
641 	if (!pbe) {
642 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
643 		return;
644 	}
645 
646 	pbe_cnt = 0;
647 
648 	rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) {
649 		u64 pg_addr = rdma_block_iter_dma_address(&biter);
650 
651 		pbe->lo = cpu_to_le32(pg_addr);
652 		pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
653 
654 		pbe_cnt++;
655 		total_num_pbes++;
656 		pbe++;
657 
658 		if (total_num_pbes == pbl_info->num_pbes)
659 			return;
660 
661 		/* If the given pbl is full storing the pbes, move to next pbl.
662 		 */
663 		if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
664 			pbl_tbl++;
665 			pbe = (struct regpair *)pbl_tbl->va;
666 			pbe_cnt = 0;
667 		}
668 	}
669 }
670 
671 static int qedr_db_recovery_add(struct qedr_dev *dev,
672 				void __iomem *db_addr,
673 				void *db_data,
674 				enum qed_db_rec_width db_width,
675 				enum qed_db_rec_space db_space)
676 {
677 	if (!db_data) {
678 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
679 		return 0;
680 	}
681 
682 	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
683 						 db_width, db_space);
684 }
685 
686 static void qedr_db_recovery_del(struct qedr_dev *dev,
687 				 void __iomem *db_addr,
688 				 void *db_data)
689 {
690 	if (!db_data) {
691 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
692 		return;
693 	}
694 
695 	/* Ignore return code as there is not much we can do about it. Error
696 	 * log will be printed inside.
697 	 */
698 	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
699 }
700 
701 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
702 			      struct qedr_cq *cq, struct ib_udata *udata,
703 			      u32 db_offset)
704 {
705 	struct qedr_create_cq_uresp uresp;
706 	int rc;
707 
708 	memset(&uresp, 0, sizeof(uresp));
709 
710 	uresp.db_offset = db_offset;
711 	uresp.icid = cq->icid;
712 	if (cq->q.db_mmap_entry)
713 		uresp.db_rec_addr =
714 			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
715 
716 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
717 	if (rc)
718 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
719 
720 	return rc;
721 }
722 
723 static void consume_cqe(struct qedr_cq *cq)
724 {
725 	if (cq->latest_cqe == cq->toggle_cqe)
726 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
727 
728 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
729 }
730 
731 static inline int qedr_align_cq_entries(int entries)
732 {
733 	u64 size, aligned_size;
734 
735 	/* We allocate an extra entry that we don't report to the FW. */
736 	size = (entries + 1) * QEDR_CQE_SIZE;
737 	aligned_size = ALIGN(size, PAGE_SIZE);
738 
739 	return aligned_size / QEDR_CQE_SIZE;
740 }
741 
742 static int qedr_init_user_db_rec(struct ib_udata *udata,
743 				 struct qedr_dev *dev, struct qedr_userq *q,
744 				 bool requires_db_rec)
745 {
746 	struct qedr_ucontext *uctx =
747 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
748 					  ibucontext);
749 	struct qedr_user_mmap_entry *entry;
750 	int rc;
751 
752 	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
753 	if (requires_db_rec == 0 || !uctx->db_rec)
754 		return 0;
755 
756 	/* Allocate a page for doorbell recovery, add to mmap */
757 	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
758 	if (!q->db_rec_data) {
759 		DP_ERR(dev, "get_zeroed_page failed\n");
760 		return -ENOMEM;
761 	}
762 
763 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
764 	if (!entry)
765 		goto err_free_db_data;
766 
767 	entry->address = q->db_rec_data;
768 	entry->length = PAGE_SIZE;
769 	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
770 	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
771 					 &entry->rdma_entry,
772 					 PAGE_SIZE);
773 	if (rc)
774 		goto err_free_entry;
775 
776 	q->db_mmap_entry = &entry->rdma_entry;
777 
778 	return 0;
779 
780 err_free_entry:
781 	kfree(entry);
782 
783 err_free_db_data:
784 	free_page((unsigned long)q->db_rec_data);
785 	q->db_rec_data = NULL;
786 	return -ENOMEM;
787 }
788 
789 static inline int qedr_init_user_queue(struct ib_udata *udata,
790 				       struct qedr_dev *dev,
791 				       struct qedr_userq *q, u64 buf_addr,
792 				       size_t buf_len, bool requires_db_rec,
793 				       int access,
794 				       int alloc_and_init)
795 {
796 	u32 fw_pages;
797 	int rc;
798 
799 	q->buf_addr = buf_addr;
800 	q->buf_len = buf_len;
801 	q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
802 	if (IS_ERR(q->umem)) {
803 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
804 		       PTR_ERR(q->umem));
805 		return PTR_ERR(q->umem);
806 	}
807 
808 	fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
809 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
810 	if (rc)
811 		goto err0;
812 
813 	if (alloc_and_init) {
814 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
815 		if (IS_ERR(q->pbl_tbl)) {
816 			rc = PTR_ERR(q->pbl_tbl);
817 			goto err0;
818 		}
819 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
820 				   FW_PAGE_SHIFT);
821 	} else {
822 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
823 		if (!q->pbl_tbl) {
824 			rc = -ENOMEM;
825 			goto err0;
826 		}
827 	}
828 
829 	/* mmap the user address used to store doorbell data for recovery */
830 	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
831 
832 err0:
833 	ib_umem_release(q->umem);
834 	q->umem = NULL;
835 
836 	return rc;
837 }
838 
839 static inline void qedr_init_cq_params(struct qedr_cq *cq,
840 				       struct qedr_ucontext *ctx,
841 				       struct qedr_dev *dev, int vector,
842 				       int chain_entries, int page_cnt,
843 				       u64 pbl_ptr,
844 				       struct qed_rdma_create_cq_in_params
845 				       *params)
846 {
847 	memset(params, 0, sizeof(*params));
848 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
849 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
850 	params->cnq_id = vector;
851 	params->cq_size = chain_entries - 1;
852 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
853 	params->pbl_num_pages = page_cnt;
854 	params->pbl_ptr = pbl_ptr;
855 	params->pbl_two_level = 0;
856 }
857 
858 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
859 {
860 	cq->db.data.agg_flags = flags;
861 	cq->db.data.value = cpu_to_le32(cons);
862 	writeq(cq->db.raw, cq->db_addr);
863 }
864 
865 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
866 {
867 	struct qedr_cq *cq = get_qedr_cq(ibcq);
868 	unsigned long sflags;
869 	struct qedr_dev *dev;
870 
871 	dev = get_qedr_dev(ibcq->device);
872 
873 	if (cq->destroyed) {
874 		DP_ERR(dev,
875 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
876 		       cq, cq->icid);
877 		return -EINVAL;
878 	}
879 
880 
881 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
882 		return 0;
883 
884 	spin_lock_irqsave(&cq->cq_lock, sflags);
885 
886 	cq->arm_flags = 0;
887 
888 	if (flags & IB_CQ_SOLICITED)
889 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
890 
891 	if (flags & IB_CQ_NEXT_COMP)
892 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
893 
894 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
895 
896 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
897 
898 	return 0;
899 }
900 
901 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
902 		   struct ib_udata *udata)
903 {
904 	struct ib_device *ibdev = ibcq->device;
905 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
906 		udata, struct qedr_ucontext, ibucontext);
907 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
908 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
909 	struct qed_chain_init_params chain_params = {
910 		.mode		= QED_CHAIN_MODE_PBL,
911 		.intended_use	= QED_CHAIN_USE_TO_CONSUME,
912 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
913 		.elem_size	= sizeof(union rdma_cqe),
914 	};
915 	struct qedr_dev *dev = get_qedr_dev(ibdev);
916 	struct qed_rdma_create_cq_in_params params;
917 	struct qedr_create_cq_ureq ureq = {};
918 	int vector = attr->comp_vector;
919 	int entries = attr->cqe;
920 	struct qedr_cq *cq = get_qedr_cq(ibcq);
921 	int chain_entries;
922 	u32 db_offset;
923 	int page_cnt;
924 	u64 pbl_ptr;
925 	u16 icid;
926 	int rc;
927 
928 	DP_DEBUG(dev, QEDR_MSG_INIT,
929 		 "create_cq: called from %s. entries=%d, vector=%d\n",
930 		 udata ? "User Lib" : "Kernel", entries, vector);
931 
932 	if (attr->flags)
933 		return -EOPNOTSUPP;
934 
935 	if (entries > QEDR_MAX_CQES) {
936 		DP_ERR(dev,
937 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
938 		       entries, QEDR_MAX_CQES);
939 		return -EINVAL;
940 	}
941 
942 	chain_entries = qedr_align_cq_entries(entries);
943 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
944 	chain_params.num_elems = chain_entries;
945 
946 	/* calc db offset. user will add DPI base, kernel will add db addr */
947 	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
948 
949 	if (udata) {
950 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
951 							 udata->inlen))) {
952 			DP_ERR(dev,
953 			       "create cq: problem copying data from user space\n");
954 			goto err0;
955 		}
956 
957 		if (!ureq.len) {
958 			DP_ERR(dev,
959 			       "create cq: cannot create a cq with 0 entries\n");
960 			goto err0;
961 		}
962 
963 		cq->cq_type = QEDR_CQ_TYPE_USER;
964 
965 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
966 					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
967 					  1);
968 		if (rc)
969 			goto err0;
970 
971 		pbl_ptr = cq->q.pbl_tbl->pa;
972 		page_cnt = cq->q.pbl_info.num_pbes;
973 
974 		cq->ibcq.cqe = chain_entries;
975 		cq->q.db_addr = ctx->dpi_addr + db_offset;
976 	} else {
977 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
978 
979 		rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
980 						   &chain_params);
981 		if (rc)
982 			goto err0;
983 
984 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
985 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
986 		cq->ibcq.cqe = cq->pbl.capacity;
987 	}
988 
989 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
990 			    pbl_ptr, &params);
991 
992 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
993 	if (rc)
994 		goto err1;
995 
996 	cq->icid = icid;
997 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
998 	spin_lock_init(&cq->cq_lock);
999 
1000 	if (udata) {
1001 		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
1002 		if (rc)
1003 			goto err2;
1004 
1005 		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
1006 					  &cq->q.db_rec_data->db_data,
1007 					  DB_REC_WIDTH_64B,
1008 					  DB_REC_USER);
1009 		if (rc)
1010 			goto err2;
1011 
1012 	} else {
1013 		/* Generate doorbell address. */
1014 		cq->db.data.icid = cq->icid;
1015 		cq->db_addr = dev->db_addr + db_offset;
1016 		cq->db.data.params = DB_AGG_CMD_MAX <<
1017 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1018 
1019 		/* point to the very last element, passing it we will toggle */
1020 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1021 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1022 		cq->latest_cqe = NULL;
1023 		consume_cqe(cq);
1024 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1025 
1026 		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1027 					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
1028 		if (rc)
1029 			goto err2;
1030 	}
1031 
1032 	DP_DEBUG(dev, QEDR_MSG_CQ,
1033 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1034 		 cq->icid, cq, params.cq_size);
1035 
1036 	return 0;
1037 
1038 err2:
1039 	destroy_iparams.icid = cq->icid;
1040 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1041 				  &destroy_oparams);
1042 err1:
1043 	if (udata) {
1044 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1045 		ib_umem_release(cq->q.umem);
1046 		if (cq->q.db_mmap_entry)
1047 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1048 	} else {
1049 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1050 	}
1051 err0:
1052 	return -EINVAL;
1053 }
1054 
1055 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1056 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1057 
1058 int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1059 {
1060 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1061 	struct qed_rdma_destroy_cq_out_params oparams;
1062 	struct qed_rdma_destroy_cq_in_params iparams;
1063 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1064 	int iter;
1065 
1066 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1067 
1068 	cq->destroyed = 1;
1069 
1070 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1071 	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1072 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1073 		return 0;
1074 	}
1075 
1076 	iparams.icid = cq->icid;
1077 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1078 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1079 
1080 	if (udata) {
1081 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1082 		ib_umem_release(cq->q.umem);
1083 
1084 		if (cq->q.db_rec_data) {
1085 			qedr_db_recovery_del(dev, cq->q.db_addr,
1086 					     &cq->q.db_rec_data->db_data);
1087 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1088 		}
1089 	} else {
1090 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1091 	}
1092 
1093 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1094 	 * wait until all CNQ interrupts, if any, are received. This will always
1095 	 * happen and will always happen very fast. If not, then a serious error
1096 	 * has occured. That is why we can use a long delay.
1097 	 * We spin for a short time so we don’t lose time on context switching
1098 	 * in case all the completions are handled in that span. Otherwise
1099 	 * we sleep for a while and check again. Since the CNQ may be
1100 	 * associated with (only) the current CPU we use msleep to allow the
1101 	 * current CPU to be freed.
1102 	 * The CNQ notification is increased in qedr_irq_handler().
1103 	 */
1104 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1105 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1106 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1107 		iter--;
1108 	}
1109 
1110 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1111 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1112 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1113 		iter--;
1114 	}
1115 
1116 	/* Note that we don't need to have explicit code to wait for the
1117 	 * completion of the event handler because it is invoked from the EQ.
1118 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1119 	 * be certain that there's no event handler in process.
1120 	 */
1121 	return 0;
1122 }
1123 
1124 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1125 					  struct ib_qp_attr *attr,
1126 					  int attr_mask,
1127 					  struct qed_rdma_modify_qp_in_params
1128 					  *qp_params)
1129 {
1130 	const struct ib_gid_attr *gid_attr;
1131 	enum rdma_network_type nw_type;
1132 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1133 	u32 ipv4_addr;
1134 	int ret;
1135 	int i;
1136 
1137 	gid_attr = grh->sgid_attr;
1138 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1139 	if (ret)
1140 		return ret;
1141 
1142 	nw_type = rdma_gid_attr_network_type(gid_attr);
1143 	switch (nw_type) {
1144 	case RDMA_NETWORK_IPV6:
1145 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1146 		       sizeof(qp_params->sgid));
1147 		memcpy(&qp_params->dgid.bytes[0],
1148 		       &grh->dgid,
1149 		       sizeof(qp_params->dgid));
1150 		qp_params->roce_mode = ROCE_V2_IPV6;
1151 		SET_FIELD(qp_params->modify_flags,
1152 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1153 		break;
1154 	case RDMA_NETWORK_ROCE_V1:
1155 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1156 		       sizeof(qp_params->sgid));
1157 		memcpy(&qp_params->dgid.bytes[0],
1158 		       &grh->dgid,
1159 		       sizeof(qp_params->dgid));
1160 		qp_params->roce_mode = ROCE_V1;
1161 		break;
1162 	case RDMA_NETWORK_IPV4:
1163 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1164 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1165 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1166 		qp_params->sgid.ipv4_addr = ipv4_addr;
1167 		ipv4_addr =
1168 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1169 		qp_params->dgid.ipv4_addr = ipv4_addr;
1170 		SET_FIELD(qp_params->modify_flags,
1171 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1172 		qp_params->roce_mode = ROCE_V2_IPV4;
1173 		break;
1174 	default:
1175 		return -EINVAL;
1176 	}
1177 
1178 	for (i = 0; i < 4; i++) {
1179 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1180 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1181 	}
1182 
1183 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1184 		qp_params->vlan_id = 0;
1185 
1186 	return 0;
1187 }
1188 
1189 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1190 			       struct ib_qp_init_attr *attrs,
1191 			       struct ib_udata *udata)
1192 {
1193 	struct qedr_device_attr *qattr = &dev->attr;
1194 
1195 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1196 	if (attrs->qp_type != IB_QPT_RC &&
1197 	    attrs->qp_type != IB_QPT_GSI &&
1198 	    attrs->qp_type != IB_QPT_XRC_INI &&
1199 	    attrs->qp_type != IB_QPT_XRC_TGT) {
1200 		DP_DEBUG(dev, QEDR_MSG_QP,
1201 			 "create qp: unsupported qp type=0x%x requested\n",
1202 			 attrs->qp_type);
1203 		return -EOPNOTSUPP;
1204 	}
1205 
1206 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1207 		DP_ERR(dev,
1208 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1209 		       attrs->cap.max_send_wr, qattr->max_sqe);
1210 		return -EINVAL;
1211 	}
1212 
1213 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1214 		DP_ERR(dev,
1215 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1216 		       attrs->cap.max_inline_data, qattr->max_inline);
1217 		return -EINVAL;
1218 	}
1219 
1220 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1221 		DP_ERR(dev,
1222 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1223 		       attrs->cap.max_send_sge, qattr->max_sge);
1224 		return -EINVAL;
1225 	}
1226 
1227 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1228 		DP_ERR(dev,
1229 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1230 		       attrs->cap.max_recv_sge, qattr->max_sge);
1231 		return -EINVAL;
1232 	}
1233 
1234 	/* verify consumer QPs are not trying to use GSI QP's CQ.
1235 	 * TGT QP isn't associated with RQ/SQ
1236 	 */
1237 	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
1238 	    (attrs->qp_type != IB_QPT_XRC_TGT) &&
1239 	    (attrs->qp_type != IB_QPT_XRC_INI)) {
1240 		struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
1241 		struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
1242 
1243 		if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) ||
1244 		    (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) {
1245 			DP_ERR(dev,
1246 			       "create qp: consumer QP cannot use GSI CQs.\n");
1247 			return -EINVAL;
1248 		}
1249 	}
1250 
1251 	return 0;
1252 }
1253 
1254 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1255 			       struct qedr_srq *srq, struct ib_udata *udata)
1256 {
1257 	struct qedr_create_srq_uresp uresp = {};
1258 	int rc;
1259 
1260 	uresp.srq_id = srq->srq_id;
1261 
1262 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1263 	if (rc)
1264 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1265 
1266 	return rc;
1267 }
1268 
1269 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1270 			       struct qedr_create_qp_uresp *uresp,
1271 			       struct qedr_qp *qp)
1272 {
1273 	/* iWARP requires two doorbells per RQ. */
1274 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1275 		uresp->rq_db_offset =
1276 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1277 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1278 	} else {
1279 		uresp->rq_db_offset =
1280 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1281 	}
1282 
1283 	uresp->rq_icid = qp->icid;
1284 	if (qp->urq.db_mmap_entry)
1285 		uresp->rq_db_rec_addr =
1286 			rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1287 }
1288 
1289 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1290 			       struct qedr_create_qp_uresp *uresp,
1291 			       struct qedr_qp *qp)
1292 {
1293 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1294 
1295 	/* iWARP uses the same cid for rq and sq */
1296 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1297 		uresp->sq_icid = qp->icid;
1298 	else
1299 		uresp->sq_icid = qp->icid + 1;
1300 
1301 	if (qp->usq.db_mmap_entry)
1302 		uresp->sq_db_rec_addr =
1303 			rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1304 }
1305 
1306 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1307 			      struct qedr_qp *qp, struct ib_udata *udata,
1308 			      struct qedr_create_qp_uresp *uresp)
1309 {
1310 	int rc;
1311 
1312 	memset(uresp, 0, sizeof(*uresp));
1313 
1314 	if (qedr_qp_has_sq(qp))
1315 		qedr_copy_sq_uresp(dev, uresp, qp);
1316 
1317 	if (qedr_qp_has_rq(qp))
1318 		qedr_copy_rq_uresp(dev, uresp, qp);
1319 
1320 	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1321 	uresp->qp_id = qp->qp_id;
1322 
1323 	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1324 	if (rc)
1325 		DP_ERR(dev,
1326 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1327 		       qp->icid);
1328 
1329 	return rc;
1330 }
1331 
1332 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1333 {
1334 	qed_chain_reset(&qph->pbl);
1335 	qph->prod = 0;
1336 	qph->cons = 0;
1337 	qph->wqe_cons = 0;
1338 	qph->db_data.data.value = cpu_to_le16(0);
1339 }
1340 
1341 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1342 				      struct qedr_qp *qp,
1343 				      struct qedr_pd *pd,
1344 				      struct ib_qp_init_attr *attrs)
1345 {
1346 	spin_lock_init(&qp->q_lock);
1347 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1348 		kref_init(&qp->refcnt);
1349 		init_completion(&qp->iwarp_cm_comp);
1350 		init_completion(&qp->qp_rel_comp);
1351 	}
1352 
1353 	qp->pd = pd;
1354 	qp->qp_type = attrs->qp_type;
1355 	qp->max_inline_data = attrs->cap.max_inline_data;
1356 	qp->state = QED_ROCE_QP_STATE_RESET;
1357 
1358 	qp->prev_wqe_size = 0;
1359 
1360 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1361 	qp->dev = dev;
1362 	if (qedr_qp_has_sq(qp)) {
1363 		qedr_reset_qp_hwq_info(&qp->sq);
1364 		qp->sq.max_sges = attrs->cap.max_send_sge;
1365 		qp->sq_cq = get_qedr_cq(attrs->send_cq);
1366 		DP_DEBUG(dev, QEDR_MSG_QP,
1367 			 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1368 			 qp->sq.max_sges, qp->sq_cq->icid);
1369 	}
1370 
1371 	if (attrs->srq)
1372 		qp->srq = get_qedr_srq(attrs->srq);
1373 
1374 	if (qedr_qp_has_rq(qp)) {
1375 		qedr_reset_qp_hwq_info(&qp->rq);
1376 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1377 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1378 		DP_DEBUG(dev, QEDR_MSG_QP,
1379 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1380 			 qp->rq.max_sges, qp->rq_cq->icid);
1381 	}
1382 
1383 	DP_DEBUG(dev, QEDR_MSG_QP,
1384 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1385 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1386 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1387 	DP_DEBUG(dev, QEDR_MSG_QP,
1388 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1389 		 qp->sq.max_sges, qp->sq_cq->icid);
1390 }
1391 
1392 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1393 {
1394 	int rc = 0;
1395 
1396 	if (qedr_qp_has_sq(qp)) {
1397 		qp->sq.db = dev->db_addr +
1398 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1399 		qp->sq.db_data.data.icid = qp->icid + 1;
1400 		rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data,
1401 					  DB_REC_WIDTH_32B, DB_REC_KERNEL);
1402 		if (rc)
1403 			return rc;
1404 	}
1405 
1406 	if (qedr_qp_has_rq(qp)) {
1407 		qp->rq.db = dev->db_addr +
1408 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1409 		qp->rq.db_data.data.icid = qp->icid;
1410 		rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data,
1411 					  DB_REC_WIDTH_32B, DB_REC_KERNEL);
1412 		if (rc && qedr_qp_has_sq(qp))
1413 			qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
1414 	}
1415 
1416 	return rc;
1417 }
1418 
1419 static int qedr_check_srq_params(struct qedr_dev *dev,
1420 				 struct ib_srq_init_attr *attrs,
1421 				 struct ib_udata *udata)
1422 {
1423 	struct qedr_device_attr *qattr = &dev->attr;
1424 
1425 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1426 		DP_ERR(dev,
1427 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1428 		       attrs->attr.max_wr, qattr->max_srq_wr);
1429 		return -EINVAL;
1430 	}
1431 
1432 	if (attrs->attr.max_sge > qattr->max_sge) {
1433 		DP_ERR(dev,
1434 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1435 		       attrs->attr.max_sge, qattr->max_sge);
1436 	}
1437 
1438 	if (!udata && attrs->srq_type == IB_SRQT_XRC) {
1439 		DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n");
1440 		return -EINVAL;
1441 	}
1442 
1443 	return 0;
1444 }
1445 
1446 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1447 {
1448 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1449 	ib_umem_release(srq->usrq.umem);
1450 	ib_umem_release(srq->prod_umem);
1451 }
1452 
1453 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1454 {
1455 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1456 	struct qedr_dev *dev = srq->dev;
1457 
1458 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1459 
1460 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1461 			  hw_srq->virt_prod_pair_addr,
1462 			  hw_srq->phy_prod_pair_addr);
1463 }
1464 
1465 static int qedr_init_srq_user_params(struct ib_udata *udata,
1466 				     struct qedr_srq *srq,
1467 				     struct qedr_create_srq_ureq *ureq,
1468 				     int access)
1469 {
1470 	struct scatterlist *sg;
1471 	int rc;
1472 
1473 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1474 				  ureq->srq_len, false, access, 1);
1475 	if (rc)
1476 		return rc;
1477 
1478 	srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1479 				     sizeof(struct rdma_srq_producers), access);
1480 	if (IS_ERR(srq->prod_umem)) {
1481 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1482 		ib_umem_release(srq->usrq.umem);
1483 		DP_ERR(srq->dev,
1484 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1485 		       PTR_ERR(srq->prod_umem));
1486 		return PTR_ERR(srq->prod_umem);
1487 	}
1488 
1489 	sg = srq->prod_umem->sgt_append.sgt.sgl;
1490 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1491 
1492 	return 0;
1493 }
1494 
1495 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1496 					struct qedr_dev *dev,
1497 					struct ib_srq_init_attr *init_attr)
1498 {
1499 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1500 	struct qed_chain_init_params params = {
1501 		.mode		= QED_CHAIN_MODE_PBL,
1502 		.intended_use	= QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1503 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1504 		.elem_size	= QEDR_SRQ_WQE_ELEM_SIZE,
1505 	};
1506 	dma_addr_t phy_prod_pair_addr;
1507 	u32 num_elems;
1508 	void *va;
1509 	int rc;
1510 
1511 	va = dma_alloc_coherent(&dev->pdev->dev,
1512 				sizeof(struct rdma_srq_producers),
1513 				&phy_prod_pair_addr, GFP_KERNEL);
1514 	if (!va) {
1515 		DP_ERR(dev,
1516 		       "create srq: failed to allocate dma memory for producer\n");
1517 		return -ENOMEM;
1518 	}
1519 
1520 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1521 	hw_srq->virt_prod_pair_addr = va;
1522 
1523 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1524 	params.num_elems = num_elems;
1525 
1526 	rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
1527 	if (rc)
1528 		goto err0;
1529 
1530 	hw_srq->num_elems = num_elems;
1531 
1532 	return 0;
1533 
1534 err0:
1535 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1536 			  va, phy_prod_pair_addr);
1537 	return rc;
1538 }
1539 
1540 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1541 		    struct ib_udata *udata)
1542 {
1543 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1544 	struct qed_rdma_create_srq_in_params in_params = {};
1545 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1546 	struct qed_rdma_create_srq_out_params out_params;
1547 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1548 	struct qedr_create_srq_ureq ureq = {};
1549 	u64 pbl_base_addr, phy_prod_pair_addr;
1550 	struct qedr_srq_hwq_info *hw_srq;
1551 	u32 page_cnt, page_size;
1552 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1553 	int rc = 0;
1554 
1555 	DP_DEBUG(dev, QEDR_MSG_QP,
1556 		 "create SRQ called from %s (pd %p)\n",
1557 		 (udata) ? "User lib" : "kernel", pd);
1558 
1559 	if (init_attr->srq_type != IB_SRQT_BASIC &&
1560 	    init_attr->srq_type != IB_SRQT_XRC)
1561 		return -EOPNOTSUPP;
1562 
1563 	rc = qedr_check_srq_params(dev, init_attr, udata);
1564 	if (rc)
1565 		return -EINVAL;
1566 
1567 	srq->dev = dev;
1568 	srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
1569 	hw_srq = &srq->hw_srq;
1570 	spin_lock_init(&srq->lock);
1571 
1572 	hw_srq->max_wr = init_attr->attr.max_wr;
1573 	hw_srq->max_sges = init_attr->attr.max_sge;
1574 
1575 	if (udata) {
1576 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1577 							 udata->inlen))) {
1578 			DP_ERR(dev,
1579 			       "create srq: problem copying data from user space\n");
1580 			goto err0;
1581 		}
1582 
1583 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1584 		if (rc)
1585 			goto err0;
1586 
1587 		page_cnt = srq->usrq.pbl_info.num_pbes;
1588 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1589 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1590 		page_size = PAGE_SIZE;
1591 	} else {
1592 		struct qed_chain *pbl;
1593 
1594 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1595 		if (rc)
1596 			goto err0;
1597 
1598 		pbl = &hw_srq->pbl;
1599 		page_cnt = qed_chain_get_page_cnt(pbl);
1600 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1601 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1602 		page_size = QED_CHAIN_PAGE_SIZE;
1603 	}
1604 
1605 	in_params.pd_id = pd->pd_id;
1606 	in_params.pbl_base_addr = pbl_base_addr;
1607 	in_params.prod_pair_addr = phy_prod_pair_addr;
1608 	in_params.num_pages = page_cnt;
1609 	in_params.page_size = page_size;
1610 	if (srq->is_xrc) {
1611 		struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd);
1612 		struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq);
1613 
1614 		in_params.is_xrc = 1;
1615 		in_params.xrcd_id = xrcd->xrcd_id;
1616 		in_params.cq_cid = cq->icid;
1617 	}
1618 
1619 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1620 	if (rc)
1621 		goto err1;
1622 
1623 	srq->srq_id = out_params.srq_id;
1624 
1625 	if (udata) {
1626 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1627 		if (rc)
1628 			goto err2;
1629 	}
1630 
1631 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1632 	if (rc)
1633 		goto err2;
1634 
1635 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1636 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1637 	return 0;
1638 
1639 err2:
1640 	destroy_in_params.srq_id = srq->srq_id;
1641 
1642 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1643 err1:
1644 	if (udata)
1645 		qedr_free_srq_user_params(srq);
1646 	else
1647 		qedr_free_srq_kernel_params(srq);
1648 err0:
1649 	return -EFAULT;
1650 }
1651 
1652 int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1653 {
1654 	struct qed_rdma_destroy_srq_in_params in_params = {};
1655 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1656 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1657 
1658 	xa_erase_irq(&dev->srqs, srq->srq_id);
1659 	in_params.srq_id = srq->srq_id;
1660 	in_params.is_xrc = srq->is_xrc;
1661 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1662 
1663 	if (ibsrq->uobject)
1664 		qedr_free_srq_user_params(srq);
1665 	else
1666 		qedr_free_srq_kernel_params(srq);
1667 
1668 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1669 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1670 		 srq->srq_id);
1671 	return 0;
1672 }
1673 
1674 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1675 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1676 {
1677 	struct qed_rdma_modify_srq_in_params in_params = {};
1678 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1679 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1680 	int rc;
1681 
1682 	if (attr_mask & IB_SRQ_MAX_WR) {
1683 		DP_ERR(dev,
1684 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1685 		       attr_mask, srq);
1686 		return -EINVAL;
1687 	}
1688 
1689 	if (attr_mask & IB_SRQ_LIMIT) {
1690 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1691 			DP_ERR(dev,
1692 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1693 			       attr->srq_limit, srq->hw_srq.max_wr);
1694 			return -EINVAL;
1695 		}
1696 
1697 		in_params.srq_id = srq->srq_id;
1698 		in_params.wqe_limit = attr->srq_limit;
1699 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1700 		if (rc)
1701 			return rc;
1702 	}
1703 
1704 	srq->srq_limit = attr->srq_limit;
1705 
1706 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1707 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1708 
1709 	return 0;
1710 }
1711 
1712 static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type)
1713 {
1714 	switch (ib_qp_type) {
1715 	case IB_QPT_RC:
1716 		return QED_RDMA_QP_TYPE_RC;
1717 	case IB_QPT_XRC_INI:
1718 		return QED_RDMA_QP_TYPE_XRC_INI;
1719 	case IB_QPT_XRC_TGT:
1720 		return QED_RDMA_QP_TYPE_XRC_TGT;
1721 	default:
1722 		return QED_RDMA_QP_TYPE_INVAL;
1723 	}
1724 }
1725 
1726 static inline void
1727 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1728 			      struct qedr_pd *pd,
1729 			      struct qedr_qp *qp,
1730 			      struct ib_qp_init_attr *attrs,
1731 			      bool fmr_and_reserved_lkey,
1732 			      struct qed_rdma_create_qp_in_params *params)
1733 {
1734 	/* QP handle to be written in an async event */
1735 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1736 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1737 
1738 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1739 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1740 	params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type);
1741 	params->stats_queue = 0;
1742 
1743 	if (pd) {
1744 		params->pd = pd->pd_id;
1745 		params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1746 	}
1747 
1748 	if (qedr_qp_has_sq(qp))
1749 		params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1750 
1751 	if (qedr_qp_has_rq(qp))
1752 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1753 
1754 	if (qedr_qp_has_srq(qp)) {
1755 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1756 		params->srq_id = qp->srq->srq_id;
1757 		params->use_srq = true;
1758 	} else {
1759 		params->srq_id = 0;
1760 		params->use_srq = false;
1761 	}
1762 }
1763 
1764 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1765 {
1766 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1767 		 "qp=%p. "
1768 		 "sq_addr=0x%llx, "
1769 		 "sq_len=%zd, "
1770 		 "rq_addr=0x%llx, "
1771 		 "rq_len=%zd"
1772 		 "\n",
1773 		 qp,
1774 		 qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0,
1775 		 qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0,
1776 		 qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0,
1777 		 qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0);
1778 }
1779 
1780 static inline void
1781 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1782 			    struct qedr_qp *qp,
1783 			    struct qed_rdma_create_qp_out_params *out_params)
1784 {
1785 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1786 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1787 
1788 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1789 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1790 	if (!qp->srq) {
1791 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1792 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1793 	}
1794 
1795 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1796 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1797 }
1798 
1799 static void qedr_cleanup_user(struct qedr_dev *dev,
1800 			      struct qedr_ucontext *ctx,
1801 			      struct qedr_qp *qp)
1802 {
1803 	if (qedr_qp_has_sq(qp)) {
1804 		ib_umem_release(qp->usq.umem);
1805 		qp->usq.umem = NULL;
1806 	}
1807 
1808 	if (qedr_qp_has_rq(qp)) {
1809 		ib_umem_release(qp->urq.umem);
1810 		qp->urq.umem = NULL;
1811 	}
1812 
1813 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1814 		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1815 		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1816 	} else {
1817 		kfree(qp->usq.pbl_tbl);
1818 		kfree(qp->urq.pbl_tbl);
1819 	}
1820 
1821 	if (qp->usq.db_rec_data) {
1822 		qedr_db_recovery_del(dev, qp->usq.db_addr,
1823 				     &qp->usq.db_rec_data->db_data);
1824 		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1825 	}
1826 
1827 	if (qp->urq.db_rec_data) {
1828 		qedr_db_recovery_del(dev, qp->urq.db_addr,
1829 				     &qp->urq.db_rec_data->db_data);
1830 		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1831 	}
1832 
1833 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1834 		qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1835 				     &qp->urq.db_rec_db2_data);
1836 }
1837 
1838 static int qedr_create_user_qp(struct qedr_dev *dev,
1839 			       struct qedr_qp *qp,
1840 			       struct ib_pd *ibpd,
1841 			       struct ib_udata *udata,
1842 			       struct ib_qp_init_attr *attrs)
1843 {
1844 	struct qed_rdma_create_qp_in_params in_params;
1845 	struct qed_rdma_create_qp_out_params out_params;
1846 	struct qedr_create_qp_uresp uresp = {};
1847 	struct qedr_create_qp_ureq ureq = {};
1848 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1849 	struct qedr_ucontext *ctx = NULL;
1850 	struct qedr_pd *pd = NULL;
1851 	int rc = 0;
1852 
1853 	qp->create_type = QEDR_QP_CREATE_USER;
1854 
1855 	if (ibpd) {
1856 		pd = get_qedr_pd(ibpd);
1857 		ctx = pd->uctx;
1858 	}
1859 
1860 	if (udata) {
1861 		rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1862 					udata->inlen));
1863 		if (rc) {
1864 			DP_ERR(dev, "Problem copying data from user space\n");
1865 			return rc;
1866 		}
1867 	}
1868 
1869 	if (qedr_qp_has_sq(qp)) {
1870 		/* SQ - read access only (0) */
1871 		rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1872 					  ureq.sq_len, true, 0, alloc_and_init);
1873 		if (rc)
1874 			return rc;
1875 	}
1876 
1877 	if (qedr_qp_has_rq(qp)) {
1878 		/* RQ - read access only (0) */
1879 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1880 					  ureq.rq_len, true, 0, alloc_and_init);
1881 		if (rc)
1882 			return rc;
1883 	}
1884 
1885 	memset(&in_params, 0, sizeof(in_params));
1886 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1887 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1888 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1889 
1890 	if (qp->qp_type == IB_QPT_XRC_TGT) {
1891 		struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd);
1892 
1893 		in_params.xrcd_id = xrcd->xrcd_id;
1894 		in_params.qp_handle_lo = qp->qp_id;
1895 		in_params.use_srq = 1;
1896 	}
1897 
1898 	if (qedr_qp_has_sq(qp)) {
1899 		in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1900 		in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1901 	}
1902 
1903 	if (qedr_qp_has_rq(qp)) {
1904 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1905 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1906 	}
1907 
1908 	if (ctx)
1909 		SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
1910 
1911 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1912 					      &in_params, &out_params);
1913 
1914 	if (!qp->qed_qp) {
1915 		rc = -ENOMEM;
1916 		goto err1;
1917 	}
1918 
1919 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1920 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1921 
1922 	qp->qp_id = out_params.qp_id;
1923 	qp->icid = out_params.icid;
1924 
1925 	if (udata) {
1926 		rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1927 		if (rc)
1928 			goto err;
1929 	}
1930 
1931 	/* db offset was calculated in copy_qp_uresp, now set in the user q */
1932 	if (qedr_qp_has_sq(qp)) {
1933 		qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1934 		rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1935 					  &qp->usq.db_rec_data->db_data,
1936 					  DB_REC_WIDTH_32B,
1937 					  DB_REC_USER);
1938 		if (rc)
1939 			goto err;
1940 	}
1941 
1942 	if (qedr_qp_has_rq(qp)) {
1943 		qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1944 		rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1945 					  &qp->urq.db_rec_data->db_data,
1946 					  DB_REC_WIDTH_32B,
1947 					  DB_REC_USER);
1948 		if (rc)
1949 			goto err;
1950 	}
1951 
1952 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1953 		qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
1954 
1955 		/* calculate the db_rec_db2 data since it is constant so no
1956 		 * need to reflect from user
1957 		 */
1958 		qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
1959 		qp->urq.db_rec_db2_data.data.value =
1960 			cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
1961 
1962 		rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1963 					  &qp->urq.db_rec_db2_data,
1964 					  DB_REC_WIDTH_32B,
1965 					  DB_REC_USER);
1966 		if (rc)
1967 			goto err;
1968 	}
1969 	qedr_qp_user_print(dev, qp);
1970 	return rc;
1971 err:
1972 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1973 	if (rc)
1974 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1975 
1976 err1:
1977 	qedr_cleanup_user(dev, ctx, qp);
1978 	return rc;
1979 }
1980 
1981 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1982 {
1983 	int rc;
1984 
1985 	qp->sq.db = dev->db_addr +
1986 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1987 	qp->sq.db_data.data.icid = qp->icid;
1988 
1989 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1990 				  &qp->sq.db_data,
1991 				  DB_REC_WIDTH_32B,
1992 				  DB_REC_KERNEL);
1993 	if (rc)
1994 		return rc;
1995 
1996 	qp->rq.db = dev->db_addr +
1997 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1998 	qp->rq.db_data.data.icid = qp->icid;
1999 	qp->rq.iwarp_db2 = dev->db_addr +
2000 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
2001 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
2002 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
2003 
2004 	rc = qedr_db_recovery_add(dev, qp->rq.db,
2005 				  &qp->rq.db_data,
2006 				  DB_REC_WIDTH_32B,
2007 				  DB_REC_KERNEL);
2008 	if (rc)
2009 		return rc;
2010 
2011 	rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
2012 				  &qp->rq.iwarp_db2_data,
2013 				  DB_REC_WIDTH_32B,
2014 				  DB_REC_KERNEL);
2015 	return rc;
2016 }
2017 
2018 static int
2019 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
2020 			   struct qedr_qp *qp,
2021 			   struct qed_rdma_create_qp_in_params *in_params,
2022 			   u32 n_sq_elems, u32 n_rq_elems)
2023 {
2024 	struct qed_rdma_create_qp_out_params out_params;
2025 	struct qed_chain_init_params params = {
2026 		.mode		= QED_CHAIN_MODE_PBL,
2027 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
2028 	};
2029 	int rc;
2030 
2031 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
2032 	params.num_elems = n_sq_elems;
2033 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
2034 
2035 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
2036 	if (rc)
2037 		return rc;
2038 
2039 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
2040 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
2041 
2042 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
2043 	params.num_elems = n_rq_elems;
2044 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
2045 
2046 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2047 	if (rc)
2048 		return rc;
2049 
2050 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
2051 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
2052 
2053 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
2054 					      in_params, &out_params);
2055 
2056 	if (!qp->qed_qp)
2057 		return -EINVAL;
2058 
2059 	qp->qp_id = out_params.qp_id;
2060 	qp->icid = out_params.icid;
2061 
2062 	return qedr_set_roce_db_info(dev, qp);
2063 }
2064 
2065 static int
2066 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
2067 			    struct qedr_qp *qp,
2068 			    struct qed_rdma_create_qp_in_params *in_params,
2069 			    u32 n_sq_elems, u32 n_rq_elems)
2070 {
2071 	struct qed_rdma_create_qp_out_params out_params;
2072 	struct qed_chain_init_params params = {
2073 		.mode		= QED_CHAIN_MODE_PBL,
2074 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
2075 	};
2076 	int rc;
2077 
2078 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
2079 						     QEDR_SQE_ELEMENT_SIZE,
2080 						     QED_CHAIN_PAGE_SIZE,
2081 						     QED_CHAIN_MODE_PBL);
2082 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
2083 						     QEDR_RQE_ELEMENT_SIZE,
2084 						     QED_CHAIN_PAGE_SIZE,
2085 						     QED_CHAIN_MODE_PBL);
2086 
2087 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
2088 					      in_params, &out_params);
2089 
2090 	if (!qp->qed_qp)
2091 		return -EINVAL;
2092 
2093 	/* Now we allocate the chain */
2094 
2095 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
2096 	params.num_elems = n_sq_elems;
2097 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
2098 	params.ext_pbl_virt = out_params.sq_pbl_virt;
2099 	params.ext_pbl_phys = out_params.sq_pbl_phys;
2100 
2101 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
2102 	if (rc)
2103 		goto err;
2104 
2105 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
2106 	params.num_elems = n_rq_elems;
2107 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
2108 	params.ext_pbl_virt = out_params.rq_pbl_virt;
2109 	params.ext_pbl_phys = out_params.rq_pbl_phys;
2110 
2111 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2112 	if (rc)
2113 		goto err;
2114 
2115 	qp->qp_id = out_params.qp_id;
2116 	qp->icid = out_params.icid;
2117 
2118 	return qedr_set_iwarp_db_info(dev, qp);
2119 
2120 err:
2121 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2122 
2123 	return rc;
2124 }
2125 
2126 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2127 {
2128 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2129 	kfree(qp->wqe_wr_id);
2130 
2131 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2132 	kfree(qp->rqe_wr_id);
2133 
2134 	/* GSI qp is not registered to db mechanism so no need to delete */
2135 	if (qp->qp_type == IB_QPT_GSI)
2136 		return;
2137 
2138 	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2139 
2140 	if (!qp->srq) {
2141 		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2142 
2143 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2144 			qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2145 					     &qp->rq.iwarp_db2_data);
2146 	}
2147 }
2148 
2149 static int qedr_create_kernel_qp(struct qedr_dev *dev,
2150 				 struct qedr_qp *qp,
2151 				 struct ib_pd *ibpd,
2152 				 struct ib_qp_init_attr *attrs)
2153 {
2154 	struct qed_rdma_create_qp_in_params in_params;
2155 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2156 	int rc = -EINVAL;
2157 	u32 n_rq_elems;
2158 	u32 n_sq_elems;
2159 	u32 n_sq_entries;
2160 
2161 	memset(&in_params, 0, sizeof(in_params));
2162 	qp->create_type = QEDR_QP_CREATE_KERNEL;
2163 
2164 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2165 	 * the ring. The ring should allow at least a single WR, even if the
2166 	 * user requested none, due to allocation issues.
2167 	 * We should add an extra WR since the prod and cons indices of
2168 	 * wqe_wr_id are managed in such a way that the WQ is considered full
2169 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
2170 	 * double the number of entries due an iSER issue that pushes far more
2171 	 * WRs than indicated. If we decline its ib_post_send() then we get
2172 	 * error prints in the dmesg we'd like to avoid.
2173 	 */
2174 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2175 			      dev->attr.max_sqe);
2176 
2177 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2178 				GFP_KERNEL);
2179 	if (!qp->wqe_wr_id) {
2180 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2181 		return -ENOMEM;
2182 	}
2183 
2184 	/* QP handle to be written in CQE */
2185 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2186 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2187 
2188 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2189 	 * the ring. There ring should allow at least a single WR, even if the
2190 	 * user requested none, due to allocation issues.
2191 	 */
2192 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2193 
2194 	/* Allocate driver internal RQ array */
2195 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2196 				GFP_KERNEL);
2197 	if (!qp->rqe_wr_id) {
2198 		DP_ERR(dev,
2199 		       "create qp: failed RQ shadow memory allocation\n");
2200 		kfree(qp->wqe_wr_id);
2201 		return -ENOMEM;
2202 	}
2203 
2204 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2205 
2206 	n_sq_entries = attrs->cap.max_send_wr;
2207 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2208 	n_sq_entries = max_t(u32, n_sq_entries, 1);
2209 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2210 
2211 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2212 
2213 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2214 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2215 						 n_sq_elems, n_rq_elems);
2216 	else
2217 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2218 						n_sq_elems, n_rq_elems);
2219 	if (rc)
2220 		qedr_cleanup_kernel(dev, qp);
2221 
2222 	return rc;
2223 }
2224 
2225 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2226 				  struct ib_udata *udata)
2227 {
2228 	struct qedr_ucontext *ctx =
2229 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2230 					  ibucontext);
2231 	int rc;
2232 
2233 	if (qp->qp_type != IB_QPT_GSI) {
2234 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2235 		if (rc)
2236 			return rc;
2237 	}
2238 
2239 	if (qp->create_type == QEDR_QP_CREATE_USER)
2240 		qedr_cleanup_user(dev, ctx, qp);
2241 	else
2242 		qedr_cleanup_kernel(dev, qp);
2243 
2244 	return 0;
2245 }
2246 
2247 int qedr_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
2248 		   struct ib_udata *udata)
2249 {
2250 	struct qedr_xrcd *xrcd = NULL;
2251 	struct ib_pd *ibpd = ibqp->pd;
2252 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2253 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2254 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2255 	int rc = 0;
2256 
2257 	if (attrs->create_flags)
2258 		return -EOPNOTSUPP;
2259 
2260 	if (attrs->qp_type == IB_QPT_XRC_TGT)
2261 		xrcd = get_qedr_xrcd(attrs->xrcd);
2262 	else
2263 		pd = get_qedr_pd(ibpd);
2264 
2265 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2266 		 udata ? "user library" : "kernel", pd);
2267 
2268 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2269 	if (rc)
2270 		return rc;
2271 
2272 	DP_DEBUG(dev, QEDR_MSG_QP,
2273 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2274 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2275 		 get_qedr_cq(attrs->send_cq),
2276 		 get_qedr_cq(attrs->send_cq)->icid,
2277 		 get_qedr_cq(attrs->recv_cq),
2278 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2279 
2280 	qedr_set_common_qp_params(dev, qp, pd, attrs);
2281 
2282 	if (attrs->qp_type == IB_QPT_GSI)
2283 		return qedr_create_gsi_qp(dev, attrs, qp);
2284 
2285 	if (udata || xrcd)
2286 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2287 	else
2288 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2289 
2290 	if (rc)
2291 		return rc;
2292 
2293 	qp->ibqp.qp_num = qp->qp_id;
2294 
2295 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2296 		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2297 		if (rc)
2298 			goto out_free_qp_resources;
2299 	}
2300 
2301 	return 0;
2302 
2303 out_free_qp_resources:
2304 	qedr_free_qp_resources(dev, qp, udata);
2305 	return -EFAULT;
2306 }
2307 
2308 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2309 {
2310 	switch (qp_state) {
2311 	case QED_ROCE_QP_STATE_RESET:
2312 		return IB_QPS_RESET;
2313 	case QED_ROCE_QP_STATE_INIT:
2314 		return IB_QPS_INIT;
2315 	case QED_ROCE_QP_STATE_RTR:
2316 		return IB_QPS_RTR;
2317 	case QED_ROCE_QP_STATE_RTS:
2318 		return IB_QPS_RTS;
2319 	case QED_ROCE_QP_STATE_SQD:
2320 		return IB_QPS_SQD;
2321 	case QED_ROCE_QP_STATE_ERR:
2322 		return IB_QPS_ERR;
2323 	case QED_ROCE_QP_STATE_SQE:
2324 		return IB_QPS_SQE;
2325 	}
2326 	return IB_QPS_ERR;
2327 }
2328 
2329 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2330 					enum ib_qp_state qp_state)
2331 {
2332 	switch (qp_state) {
2333 	case IB_QPS_RESET:
2334 		return QED_ROCE_QP_STATE_RESET;
2335 	case IB_QPS_INIT:
2336 		return QED_ROCE_QP_STATE_INIT;
2337 	case IB_QPS_RTR:
2338 		return QED_ROCE_QP_STATE_RTR;
2339 	case IB_QPS_RTS:
2340 		return QED_ROCE_QP_STATE_RTS;
2341 	case IB_QPS_SQD:
2342 		return QED_ROCE_QP_STATE_SQD;
2343 	case IB_QPS_ERR:
2344 		return QED_ROCE_QP_STATE_ERR;
2345 	default:
2346 		return QED_ROCE_QP_STATE_ERR;
2347 	}
2348 }
2349 
2350 static int qedr_update_qp_state(struct qedr_dev *dev,
2351 				struct qedr_qp *qp,
2352 				enum qed_roce_qp_state cur_state,
2353 				enum qed_roce_qp_state new_state)
2354 {
2355 	int status = 0;
2356 
2357 	if (new_state == cur_state)
2358 		return 0;
2359 
2360 	switch (cur_state) {
2361 	case QED_ROCE_QP_STATE_RESET:
2362 		switch (new_state) {
2363 		case QED_ROCE_QP_STATE_INIT:
2364 			break;
2365 		default:
2366 			status = -EINVAL;
2367 			break;
2368 		}
2369 		break;
2370 	case QED_ROCE_QP_STATE_INIT:
2371 		switch (new_state) {
2372 		case QED_ROCE_QP_STATE_RTR:
2373 			/* Update doorbell (in case post_recv was
2374 			 * done before move to RTR)
2375 			 */
2376 
2377 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2378 				writel(qp->rq.db_data.raw, qp->rq.db);
2379 			}
2380 			break;
2381 		case QED_ROCE_QP_STATE_ERR:
2382 			break;
2383 		default:
2384 			/* Invalid state change. */
2385 			status = -EINVAL;
2386 			break;
2387 		}
2388 		break;
2389 	case QED_ROCE_QP_STATE_RTR:
2390 		/* RTR->XXX */
2391 		switch (new_state) {
2392 		case QED_ROCE_QP_STATE_RTS:
2393 			break;
2394 		case QED_ROCE_QP_STATE_ERR:
2395 			break;
2396 		default:
2397 			/* Invalid state change. */
2398 			status = -EINVAL;
2399 			break;
2400 		}
2401 		break;
2402 	case QED_ROCE_QP_STATE_RTS:
2403 		/* RTS->XXX */
2404 		switch (new_state) {
2405 		case QED_ROCE_QP_STATE_SQD:
2406 			break;
2407 		case QED_ROCE_QP_STATE_ERR:
2408 			break;
2409 		default:
2410 			/* Invalid state change. */
2411 			status = -EINVAL;
2412 			break;
2413 		}
2414 		break;
2415 	case QED_ROCE_QP_STATE_SQD:
2416 		/* SQD->XXX */
2417 		switch (new_state) {
2418 		case QED_ROCE_QP_STATE_RTS:
2419 		case QED_ROCE_QP_STATE_ERR:
2420 			break;
2421 		default:
2422 			/* Invalid state change. */
2423 			status = -EINVAL;
2424 			break;
2425 		}
2426 		break;
2427 	case QED_ROCE_QP_STATE_ERR:
2428 		/* ERR->XXX */
2429 		switch (new_state) {
2430 		case QED_ROCE_QP_STATE_RESET:
2431 			if ((qp->rq.prod != qp->rq.cons) ||
2432 			    (qp->sq.prod != qp->sq.cons)) {
2433 				DP_NOTICE(dev,
2434 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2435 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2436 					  qp->sq.cons);
2437 				status = -EINVAL;
2438 			}
2439 			break;
2440 		default:
2441 			status = -EINVAL;
2442 			break;
2443 		}
2444 		break;
2445 	default:
2446 		status = -EINVAL;
2447 		break;
2448 	}
2449 
2450 	return status;
2451 }
2452 
2453 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2454 		   int attr_mask, struct ib_udata *udata)
2455 {
2456 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2457 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2458 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2459 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2460 	enum ib_qp_state old_qp_state, new_qp_state;
2461 	enum qed_roce_qp_state cur_state;
2462 	int rc = 0;
2463 
2464 	DP_DEBUG(dev, QEDR_MSG_QP,
2465 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2466 		 attr->qp_state);
2467 
2468 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
2469 		return -EOPNOTSUPP;
2470 
2471 	old_qp_state = qedr_get_ibqp_state(qp->state);
2472 	if (attr_mask & IB_QP_STATE)
2473 		new_qp_state = attr->qp_state;
2474 	else
2475 		new_qp_state = old_qp_state;
2476 
2477 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2478 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2479 					ibqp->qp_type, attr_mask)) {
2480 			DP_ERR(dev,
2481 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2482 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2483 			       attr_mask, qp->qp_id, ibqp->qp_type,
2484 			       old_qp_state, new_qp_state);
2485 			rc = -EINVAL;
2486 			goto err;
2487 		}
2488 	}
2489 
2490 	/* Translate the masks... */
2491 	if (attr_mask & IB_QP_STATE) {
2492 		SET_FIELD(qp_params.modify_flags,
2493 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2494 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2495 	}
2496 
2497 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2498 		qp_params.sqd_async = true;
2499 
2500 	if (attr_mask & IB_QP_PKEY_INDEX) {
2501 		SET_FIELD(qp_params.modify_flags,
2502 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2503 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2504 			rc = -EINVAL;
2505 			goto err;
2506 		}
2507 
2508 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2509 	}
2510 
2511 	if (attr_mask & IB_QP_QKEY)
2512 		qp->qkey = attr->qkey;
2513 
2514 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2515 		SET_FIELD(qp_params.modify_flags,
2516 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2517 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2518 						  IB_ACCESS_REMOTE_READ;
2519 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2520 						   IB_ACCESS_REMOTE_WRITE;
2521 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2522 					       IB_ACCESS_REMOTE_ATOMIC;
2523 	}
2524 
2525 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2526 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2527 			return -EINVAL;
2528 
2529 		if (attr_mask & IB_QP_PATH_MTU) {
2530 			if (attr->path_mtu < IB_MTU_256 ||
2531 			    attr->path_mtu > IB_MTU_4096) {
2532 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2533 				rc = -EINVAL;
2534 				goto err;
2535 			}
2536 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2537 				      ib_mtu_enum_to_int(iboe_get_mtu
2538 							 (dev->ndev->mtu)));
2539 		}
2540 
2541 		if (!qp->mtu) {
2542 			qp->mtu =
2543 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2544 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2545 		}
2546 
2547 		SET_FIELD(qp_params.modify_flags,
2548 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2549 
2550 		qp_params.traffic_class_tos = grh->traffic_class;
2551 		qp_params.flow_label = grh->flow_label;
2552 		qp_params.hop_limit_ttl = grh->hop_limit;
2553 
2554 		qp->sgid_idx = grh->sgid_index;
2555 
2556 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2557 		if (rc) {
2558 			DP_ERR(dev,
2559 			       "modify qp: problems with GID index %d (rc=%d)\n",
2560 			       grh->sgid_index, rc);
2561 			return rc;
2562 		}
2563 
2564 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2565 				   qp_params.remote_mac_addr);
2566 		if (rc)
2567 			return rc;
2568 
2569 		qp_params.use_local_mac = true;
2570 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2571 
2572 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2573 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2574 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2575 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2576 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2577 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2578 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2579 			 qp_params.remote_mac_addr);
2580 
2581 		qp_params.mtu = qp->mtu;
2582 		qp_params.lb_indication = false;
2583 	}
2584 
2585 	if (!qp_params.mtu) {
2586 		/* Stay with current MTU */
2587 		if (qp->mtu)
2588 			qp_params.mtu = qp->mtu;
2589 		else
2590 			qp_params.mtu =
2591 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2592 	}
2593 
2594 	if (attr_mask & IB_QP_TIMEOUT) {
2595 		SET_FIELD(qp_params.modify_flags,
2596 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2597 
2598 		/* The received timeout value is an exponent used like this:
2599 		 *    "12.7.34 LOCAL ACK TIMEOUT
2600 		 *    Value representing the transport (ACK) timeout for use by
2601 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2602 		 * The FW expects timeout in msec so we need to divide the usec
2603 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2604 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2605 		 * The value of zero means infinite so we use a 'max_t' to make
2606 		 * sure that sub 1 msec values will be configured as 1 msec.
2607 		 */
2608 		if (attr->timeout)
2609 			qp_params.ack_timeout =
2610 					1 << max_t(int, attr->timeout - 8, 0);
2611 		else
2612 			qp_params.ack_timeout = 0;
2613 	}
2614 
2615 	if (attr_mask & IB_QP_RETRY_CNT) {
2616 		SET_FIELD(qp_params.modify_flags,
2617 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2618 		qp_params.retry_cnt = attr->retry_cnt;
2619 	}
2620 
2621 	if (attr_mask & IB_QP_RNR_RETRY) {
2622 		SET_FIELD(qp_params.modify_flags,
2623 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2624 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2625 	}
2626 
2627 	if (attr_mask & IB_QP_RQ_PSN) {
2628 		SET_FIELD(qp_params.modify_flags,
2629 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2630 		qp_params.rq_psn = attr->rq_psn;
2631 		qp->rq_psn = attr->rq_psn;
2632 	}
2633 
2634 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2635 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2636 			rc = -EINVAL;
2637 			DP_ERR(dev,
2638 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2639 			       attr->max_rd_atomic,
2640 			       dev->attr.max_qp_req_rd_atomic_resc);
2641 			goto err;
2642 		}
2643 
2644 		SET_FIELD(qp_params.modify_flags,
2645 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2646 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2647 	}
2648 
2649 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2650 		SET_FIELD(qp_params.modify_flags,
2651 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2652 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2653 	}
2654 
2655 	if (attr_mask & IB_QP_SQ_PSN) {
2656 		SET_FIELD(qp_params.modify_flags,
2657 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2658 		qp_params.sq_psn = attr->sq_psn;
2659 		qp->sq_psn = attr->sq_psn;
2660 	}
2661 
2662 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2663 		if (attr->max_dest_rd_atomic >
2664 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2665 			DP_ERR(dev,
2666 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2667 			       attr->max_dest_rd_atomic,
2668 			       dev->attr.max_qp_resp_rd_atomic_resc);
2669 
2670 			rc = -EINVAL;
2671 			goto err;
2672 		}
2673 
2674 		SET_FIELD(qp_params.modify_flags,
2675 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2676 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2677 	}
2678 
2679 	if (attr_mask & IB_QP_DEST_QPN) {
2680 		SET_FIELD(qp_params.modify_flags,
2681 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2682 
2683 		qp_params.dest_qp = attr->dest_qp_num;
2684 		qp->dest_qp_num = attr->dest_qp_num;
2685 	}
2686 
2687 	cur_state = qp->state;
2688 
2689 	/* Update the QP state before the actual ramrod to prevent a race with
2690 	 * fast path. Modifying the QP state to error will cause the device to
2691 	 * flush the CQEs and while polling the flushed CQEs will considered as
2692 	 * a potential issue if the QP isn't in error state.
2693 	 */
2694 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2695 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2696 		qp->state = QED_ROCE_QP_STATE_ERR;
2697 
2698 	if (qp->qp_type != IB_QPT_GSI)
2699 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2700 					      qp->qed_qp, &qp_params);
2701 
2702 	if (attr_mask & IB_QP_STATE) {
2703 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2704 			rc = qedr_update_qp_state(dev, qp, cur_state,
2705 						  qp_params.new_state);
2706 		qp->state = qp_params.new_state;
2707 	}
2708 
2709 err:
2710 	return rc;
2711 }
2712 
2713 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2714 {
2715 	int ib_qp_acc_flags = 0;
2716 
2717 	if (params->incoming_rdma_write_en)
2718 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2719 	if (params->incoming_rdma_read_en)
2720 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2721 	if (params->incoming_atomic_en)
2722 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2723 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2724 	return ib_qp_acc_flags;
2725 }
2726 
2727 int qedr_query_qp(struct ib_qp *ibqp,
2728 		  struct ib_qp_attr *qp_attr,
2729 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2730 {
2731 	struct qed_rdma_query_qp_out_params params;
2732 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2733 	struct qedr_dev *dev = qp->dev;
2734 	int rc = 0;
2735 
2736 	memset(&params, 0, sizeof(params));
2737 	memset(qp_attr, 0, sizeof(*qp_attr));
2738 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2739 
2740 	if (qp->qp_type != IB_QPT_GSI) {
2741 		rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2742 		if (rc)
2743 			goto err;
2744 		qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2745 	} else {
2746 		qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS);
2747 	}
2748 
2749 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2750 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2751 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2752 	qp_attr->rq_psn = params.rq_psn;
2753 	qp_attr->sq_psn = params.sq_psn;
2754 	qp_attr->dest_qp_num = params.dest_qp;
2755 
2756 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2757 
2758 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2759 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2760 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2761 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2762 	qp_attr->cap.max_inline_data = dev->attr.max_inline;
2763 	qp_init_attr->cap = qp_attr->cap;
2764 
2765 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2766 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2767 			params.flow_label, qp->sgid_idx,
2768 			params.hop_limit_ttl, params.traffic_class_tos);
2769 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2770 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2771 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2772 	qp_attr->timeout = params.timeout;
2773 	qp_attr->rnr_retry = params.rnr_retry;
2774 	qp_attr->retry_cnt = params.retry_cnt;
2775 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2776 	qp_attr->pkey_index = params.pkey_index;
2777 	qp_attr->port_num = 1;
2778 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2779 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2780 	qp_attr->alt_pkey_index = 0;
2781 	qp_attr->alt_port_num = 0;
2782 	qp_attr->alt_timeout = 0;
2783 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2784 
2785 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2786 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2787 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2788 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2789 
2790 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2791 		 qp_attr->cap.max_inline_data);
2792 
2793 err:
2794 	return rc;
2795 }
2796 
2797 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2798 {
2799 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2800 	struct qedr_dev *dev = qp->dev;
2801 	struct ib_qp_attr attr;
2802 	int attr_mask = 0;
2803 
2804 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2805 		 qp, qp->qp_type);
2806 
2807 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2808 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2809 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2810 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2811 
2812 			attr.qp_state = IB_QPS_ERR;
2813 			attr_mask |= IB_QP_STATE;
2814 
2815 			/* Change the QP state to ERROR */
2816 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2817 		}
2818 	} else {
2819 		/* If connection establishment started the WAIT_FOR_CONNECT
2820 		 * bit will be on and we need to Wait for the establishment
2821 		 * to complete before destroying the qp.
2822 		 */
2823 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2824 				     &qp->iwarp_cm_flags))
2825 			wait_for_completion(&qp->iwarp_cm_comp);
2826 
2827 		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2828 		 * bit will be on, and we need to wait for the disconnect to
2829 		 * complete before continuing. We can use the same completion,
2830 		 * iwarp_cm_comp, since this is the only place that waits for
2831 		 * this completion and it is sequential. In addition,
2832 		 * disconnect can't occur before the connection is fully
2833 		 * established, therefore if WAIT_FOR_DISCONNECT is on it
2834 		 * means WAIT_FOR_CONNECT is also on and the completion for
2835 		 * CONNECT already occurred.
2836 		 */
2837 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2838 				     &qp->iwarp_cm_flags))
2839 			wait_for_completion(&qp->iwarp_cm_comp);
2840 	}
2841 
2842 	if (qp->qp_type == IB_QPT_GSI)
2843 		qedr_destroy_gsi_qp(dev);
2844 
2845 	/* We need to remove the entry from the xarray before we release the
2846 	 * qp_id to avoid a race of the qp_id being reallocated and failing
2847 	 * on xa_insert
2848 	 */
2849 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2850 		xa_erase(&dev->qps, qp->qp_id);
2851 
2852 	qedr_free_qp_resources(dev, qp, udata);
2853 
2854 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2855 		qedr_iw_qp_rem_ref(&qp->ibqp);
2856 		wait_for_completion(&qp->qp_rel_comp);
2857 	}
2858 
2859 	return 0;
2860 }
2861 
2862 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2863 		   struct ib_udata *udata)
2864 {
2865 	struct qedr_ah *ah = get_qedr_ah(ibah);
2866 
2867 	rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
2868 
2869 	return 0;
2870 }
2871 
2872 int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2873 {
2874 	struct qedr_ah *ah = get_qedr_ah(ibah);
2875 
2876 	rdma_destroy_ah_attr(&ah->attr);
2877 	return 0;
2878 }
2879 
2880 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2881 {
2882 	struct qedr_pbl *pbl, *tmp;
2883 
2884 	if (info->pbl_table)
2885 		list_add_tail(&info->pbl_table->list_entry,
2886 			      &info->free_pbl_list);
2887 
2888 	if (!list_empty(&info->inuse_pbl_list))
2889 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2890 
2891 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2892 		list_del(&pbl->list_entry);
2893 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2894 	}
2895 }
2896 
2897 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2898 			size_t page_list_len, bool two_layered)
2899 {
2900 	struct qedr_pbl *tmp;
2901 	int rc;
2902 
2903 	INIT_LIST_HEAD(&info->free_pbl_list);
2904 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2905 
2906 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2907 				  page_list_len, two_layered);
2908 	if (rc)
2909 		goto done;
2910 
2911 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2912 	if (IS_ERR(info->pbl_table)) {
2913 		rc = PTR_ERR(info->pbl_table);
2914 		goto done;
2915 	}
2916 
2917 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2918 		 &info->pbl_table->pa);
2919 
2920 	/* in usual case we use 2 PBLs, so we add one to free
2921 	 * list and allocating another one
2922 	 */
2923 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2924 	if (IS_ERR(tmp)) {
2925 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2926 		goto done;
2927 	}
2928 
2929 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2930 
2931 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2932 
2933 done:
2934 	if (rc)
2935 		free_mr_info(dev, info);
2936 
2937 	return rc;
2938 }
2939 
2940 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2941 			       u64 usr_addr, int acc, struct ib_udata *udata)
2942 {
2943 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2944 	struct qedr_mr *mr;
2945 	struct qedr_pd *pd;
2946 	int rc = -ENOMEM;
2947 
2948 	pd = get_qedr_pd(ibpd);
2949 	DP_DEBUG(dev, QEDR_MSG_MR,
2950 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2951 		 pd->pd_id, start, len, usr_addr, acc);
2952 
2953 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2954 		return ERR_PTR(-EINVAL);
2955 
2956 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2957 	if (!mr)
2958 		return ERR_PTR(rc);
2959 
2960 	mr->type = QEDR_MR_USER;
2961 
2962 	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2963 	if (IS_ERR(mr->umem)) {
2964 		rc = -EFAULT;
2965 		goto err0;
2966 	}
2967 
2968 	rc = init_mr_info(dev, &mr->info,
2969 			  ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1);
2970 	if (rc)
2971 		goto err1;
2972 
2973 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2974 			   &mr->info.pbl_info, PAGE_SHIFT);
2975 
2976 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2977 	if (rc) {
2978 		if (rc == -EINVAL)
2979 			DP_ERR(dev, "Out of MR resources\n");
2980 		else
2981 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
2982 
2983 		goto err1;
2984 	}
2985 
2986 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2987 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2988 	mr->hw_mr.key = 0;
2989 	mr->hw_mr.pd = pd->pd_id;
2990 	mr->hw_mr.local_read = 1;
2991 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2992 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2993 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2994 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2995 	mr->hw_mr.mw_bind = false;
2996 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2997 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2998 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2999 	mr->hw_mr.page_size_log = PAGE_SHIFT;
3000 	mr->hw_mr.length = len;
3001 	mr->hw_mr.vaddr = usr_addr;
3002 	mr->hw_mr.phy_mr = false;
3003 	mr->hw_mr.dma_mr = false;
3004 
3005 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3006 	if (rc) {
3007 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3008 		goto err2;
3009 	}
3010 
3011 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3012 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3013 	    mr->hw_mr.remote_atomic)
3014 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3015 
3016 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
3017 		 mr->ibmr.lkey);
3018 	return &mr->ibmr;
3019 
3020 err2:
3021 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3022 err1:
3023 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
3024 err0:
3025 	kfree(mr);
3026 	return ERR_PTR(rc);
3027 }
3028 
3029 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
3030 {
3031 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
3032 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
3033 	int rc = 0;
3034 
3035 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
3036 	if (rc)
3037 		return rc;
3038 
3039 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3040 
3041 	if (mr->type != QEDR_MR_DMA)
3042 		free_mr_info(dev, &mr->info);
3043 
3044 	/* it could be user registered memory. */
3045 	ib_umem_release(mr->umem);
3046 
3047 	kfree(mr);
3048 
3049 	return rc;
3050 }
3051 
3052 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
3053 				       int max_page_list_len)
3054 {
3055 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3056 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3057 	struct qedr_mr *mr;
3058 	int rc = -ENOMEM;
3059 
3060 	DP_DEBUG(dev, QEDR_MSG_MR,
3061 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
3062 		 max_page_list_len);
3063 
3064 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3065 	if (!mr)
3066 		return ERR_PTR(rc);
3067 
3068 	mr->dev = dev;
3069 	mr->type = QEDR_MR_FRMR;
3070 
3071 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
3072 	if (rc)
3073 		goto err0;
3074 
3075 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3076 	if (rc) {
3077 		if (rc == -EINVAL)
3078 			DP_ERR(dev, "Out of MR resources\n");
3079 		else
3080 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
3081 
3082 		goto err0;
3083 	}
3084 
3085 	/* Index only, 18 bit long, lkey = itid << 8 | key */
3086 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
3087 	mr->hw_mr.key = 0;
3088 	mr->hw_mr.pd = pd->pd_id;
3089 	mr->hw_mr.local_read = 1;
3090 	mr->hw_mr.local_write = 0;
3091 	mr->hw_mr.remote_read = 0;
3092 	mr->hw_mr.remote_write = 0;
3093 	mr->hw_mr.remote_atomic = 0;
3094 	mr->hw_mr.mw_bind = false;
3095 	mr->hw_mr.pbl_ptr = 0;
3096 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
3097 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
3098 	mr->hw_mr.length = 0;
3099 	mr->hw_mr.vaddr = 0;
3100 	mr->hw_mr.phy_mr = true;
3101 	mr->hw_mr.dma_mr = false;
3102 
3103 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3104 	if (rc) {
3105 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3106 		goto err1;
3107 	}
3108 
3109 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3110 	mr->ibmr.rkey = mr->ibmr.lkey;
3111 
3112 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
3113 	return mr;
3114 
3115 err1:
3116 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3117 err0:
3118 	kfree(mr);
3119 	return ERR_PTR(rc);
3120 }
3121 
3122 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3123 			    u32 max_num_sg)
3124 {
3125 	struct qedr_mr *mr;
3126 
3127 	if (mr_type != IB_MR_TYPE_MEM_REG)
3128 		return ERR_PTR(-EINVAL);
3129 
3130 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
3131 
3132 	if (IS_ERR(mr))
3133 		return ERR_PTR(-EINVAL);
3134 
3135 	return &mr->ibmr;
3136 }
3137 
3138 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3139 {
3140 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3141 	struct qedr_pbl *pbl_table;
3142 	struct regpair *pbe;
3143 	u32 pbes_in_page;
3144 
3145 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3146 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3147 		return -ENOMEM;
3148 	}
3149 
3150 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3151 		 mr->npages, addr);
3152 
3153 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3154 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3155 	pbe = (struct regpair *)pbl_table->va;
3156 	pbe +=  mr->npages % pbes_in_page;
3157 	pbe->lo = cpu_to_le32((u32)addr);
3158 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3159 
3160 	mr->npages++;
3161 
3162 	return 0;
3163 }
3164 
3165 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3166 {
3167 	int work = info->completed - info->completed_handled - 1;
3168 
3169 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3170 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3171 		struct qedr_pbl *pbl;
3172 
3173 		/* Free all the page list that are possible to be freed
3174 		 * (all the ones that were invalidated), under the assumption
3175 		 * that if an FMR was completed successfully that means that
3176 		 * if there was an invalidate operation before it also ended
3177 		 */
3178 		pbl = list_first_entry(&info->inuse_pbl_list,
3179 				       struct qedr_pbl, list_entry);
3180 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3181 		info->completed_handled++;
3182 	}
3183 }
3184 
3185 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3186 		   int sg_nents, unsigned int *sg_offset)
3187 {
3188 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3189 
3190 	mr->npages = 0;
3191 
3192 	handle_completed_mrs(mr->dev, &mr->info);
3193 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3194 }
3195 
3196 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3197 {
3198 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3199 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3200 	struct qedr_mr *mr;
3201 	int rc;
3202 
3203 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3204 	if (!mr)
3205 		return ERR_PTR(-ENOMEM);
3206 
3207 	mr->type = QEDR_MR_DMA;
3208 
3209 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3210 	if (rc) {
3211 		if (rc == -EINVAL)
3212 			DP_ERR(dev, "Out of MR resources\n");
3213 		else
3214 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
3215 
3216 		goto err1;
3217 	}
3218 
3219 	/* index only, 18 bit long, lkey = itid << 8 | key */
3220 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3221 	mr->hw_mr.pd = pd->pd_id;
3222 	mr->hw_mr.local_read = 1;
3223 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3224 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3225 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3226 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3227 	mr->hw_mr.dma_mr = true;
3228 
3229 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3230 	if (rc) {
3231 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3232 		goto err2;
3233 	}
3234 
3235 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3236 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3237 	    mr->hw_mr.remote_atomic)
3238 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3239 
3240 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3241 	return &mr->ibmr;
3242 
3243 err2:
3244 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3245 err1:
3246 	kfree(mr);
3247 	return ERR_PTR(rc);
3248 }
3249 
3250 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3251 {
3252 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3253 }
3254 
3255 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3256 {
3257 	int i, len = 0;
3258 
3259 	for (i = 0; i < num_sge; i++)
3260 		len += sg_list[i].length;
3261 
3262 	return len;
3263 }
3264 
3265 static void swap_wqe_data64(u64 *p)
3266 {
3267 	int i;
3268 
3269 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3270 		*p = cpu_to_be64(cpu_to_le64(*p));
3271 }
3272 
3273 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3274 				       struct qedr_qp *qp, u8 *wqe_size,
3275 				       const struct ib_send_wr *wr,
3276 				       const struct ib_send_wr **bad_wr,
3277 				       u8 *bits, u8 bit)
3278 {
3279 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3280 	char *seg_prt, *wqe;
3281 	int i, seg_siz;
3282 
3283 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3284 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3285 		*bad_wr = wr;
3286 		return 0;
3287 	}
3288 
3289 	if (!data_size)
3290 		return data_size;
3291 
3292 	*bits |= bit;
3293 
3294 	seg_prt = NULL;
3295 	wqe = NULL;
3296 	seg_siz = 0;
3297 
3298 	/* Copy data inline */
3299 	for (i = 0; i < wr->num_sge; i++) {
3300 		u32 len = wr->sg_list[i].length;
3301 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3302 
3303 		while (len > 0) {
3304 			u32 cur;
3305 
3306 			/* New segment required */
3307 			if (!seg_siz) {
3308 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3309 				seg_prt = wqe;
3310 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3311 				(*wqe_size)++;
3312 			}
3313 
3314 			/* Calculate currently allowed length */
3315 			cur = min_t(u32, len, seg_siz);
3316 			memcpy(seg_prt, src, cur);
3317 
3318 			/* Update segment variables */
3319 			seg_prt += cur;
3320 			seg_siz -= cur;
3321 
3322 			/* Update sge variables */
3323 			src += cur;
3324 			len -= cur;
3325 
3326 			/* Swap fully-completed segments */
3327 			if (!seg_siz)
3328 				swap_wqe_data64((u64 *)wqe);
3329 		}
3330 	}
3331 
3332 	/* swap last not completed segment */
3333 	if (seg_siz)
3334 		swap_wqe_data64((u64 *)wqe);
3335 
3336 	return data_size;
3337 }
3338 
3339 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3340 	do {							\
3341 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3342 		(sge)->length = cpu_to_le32(vlength);		\
3343 		(sge)->flags = cpu_to_le32(vflags);		\
3344 	} while (0)
3345 
3346 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3347 	do {							\
3348 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3349 		(hdr)->num_sges = num_sge;			\
3350 	} while (0)
3351 
3352 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3353 	do {							\
3354 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3355 		(sge)->length = cpu_to_le32(vlength);		\
3356 		(sge)->l_key = cpu_to_le32(vlkey);		\
3357 	} while (0)
3358 
3359 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3360 				const struct ib_send_wr *wr)
3361 {
3362 	u32 data_size = 0;
3363 	int i;
3364 
3365 	for (i = 0; i < wr->num_sge; i++) {
3366 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3367 
3368 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3369 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3370 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3371 		data_size += wr->sg_list[i].length;
3372 	}
3373 
3374 	if (wqe_size)
3375 		*wqe_size += wr->num_sge;
3376 
3377 	return data_size;
3378 }
3379 
3380 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3381 				     struct qedr_qp *qp,
3382 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3383 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3384 				     const struct ib_send_wr *wr,
3385 				     const struct ib_send_wr **bad_wr)
3386 {
3387 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3388 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3389 
3390 	if (wr->send_flags & IB_SEND_INLINE &&
3391 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3392 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3393 		u8 flags = 0;
3394 
3395 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3396 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3397 						   bad_wr, &rwqe->flags, flags);
3398 	}
3399 
3400 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3401 }
3402 
3403 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3404 				     struct qedr_qp *qp,
3405 				     struct rdma_sq_send_wqe_1st *swqe,
3406 				     struct rdma_sq_send_wqe_2st *swqe2,
3407 				     const struct ib_send_wr *wr,
3408 				     const struct ib_send_wr **bad_wr)
3409 {
3410 	memset(swqe2, 0, sizeof(*swqe2));
3411 	if (wr->send_flags & IB_SEND_INLINE) {
3412 		u8 flags = 0;
3413 
3414 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3415 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3416 						   bad_wr, &swqe->flags, flags);
3417 	}
3418 
3419 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3420 }
3421 
3422 static int qedr_prepare_reg(struct qedr_qp *qp,
3423 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3424 			    const struct ib_reg_wr *wr)
3425 {
3426 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3427 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3428 
3429 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3430 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3431 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3432 	fwqe1->l_key = wr->key;
3433 
3434 	fwqe2->access_ctrl = 0;
3435 
3436 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3437 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3438 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3439 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3440 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3441 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3442 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3443 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3444 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3445 	fwqe2->fmr_ctrl = 0;
3446 
3447 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3448 		   ilog2(mr->ibmr.page_size) - 12);
3449 
3450 	fwqe2->length_hi = 0;
3451 	fwqe2->length_lo = mr->ibmr.length;
3452 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3453 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3454 
3455 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3456 
3457 	return 0;
3458 }
3459 
3460 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3461 {
3462 	switch (opcode) {
3463 	case IB_WR_RDMA_WRITE:
3464 	case IB_WR_RDMA_WRITE_WITH_IMM:
3465 		return IB_WC_RDMA_WRITE;
3466 	case IB_WR_SEND_WITH_IMM:
3467 	case IB_WR_SEND:
3468 	case IB_WR_SEND_WITH_INV:
3469 		return IB_WC_SEND;
3470 	case IB_WR_RDMA_READ:
3471 	case IB_WR_RDMA_READ_WITH_INV:
3472 		return IB_WC_RDMA_READ;
3473 	case IB_WR_ATOMIC_CMP_AND_SWP:
3474 		return IB_WC_COMP_SWAP;
3475 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3476 		return IB_WC_FETCH_ADD;
3477 	case IB_WR_REG_MR:
3478 		return IB_WC_REG_MR;
3479 	case IB_WR_LOCAL_INV:
3480 		return IB_WC_LOCAL_INV;
3481 	default:
3482 		return IB_WC_SEND;
3483 	}
3484 }
3485 
3486 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3487 				      const struct ib_send_wr *wr)
3488 {
3489 	int wq_is_full, err_wr, pbl_is_full;
3490 	struct qedr_dev *dev = qp->dev;
3491 
3492 	/* prevent SQ overflow and/or processing of a bad WR */
3493 	err_wr = wr->num_sge > qp->sq.max_sges;
3494 	wq_is_full = qedr_wq_is_full(&qp->sq);
3495 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3496 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3497 	if (wq_is_full || err_wr || pbl_is_full) {
3498 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3499 			DP_ERR(dev,
3500 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3501 			       qp);
3502 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3503 		}
3504 
3505 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3506 			DP_ERR(dev,
3507 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3508 			       qp);
3509 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3510 		}
3511 
3512 		if (pbl_is_full &&
3513 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3514 			DP_ERR(dev,
3515 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3516 			       qp);
3517 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3518 		}
3519 		return false;
3520 	}
3521 	return true;
3522 }
3523 
3524 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3525 			    const struct ib_send_wr **bad_wr)
3526 {
3527 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3528 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3529 	struct rdma_sq_atomic_wqe_1st *awqe1;
3530 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3531 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3532 	struct rdma_sq_send_wqe_2st *swqe2;
3533 	struct rdma_sq_local_inv_wqe *iwqe;
3534 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3535 	struct rdma_sq_send_wqe_1st *swqe;
3536 	struct rdma_sq_rdma_wqe_1st *rwqe;
3537 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3538 	struct rdma_sq_common_wqe *wqe;
3539 	u32 length;
3540 	int rc = 0;
3541 	bool comp;
3542 
3543 	if (!qedr_can_post_send(qp, wr)) {
3544 		*bad_wr = wr;
3545 		return -ENOMEM;
3546 	}
3547 
3548 	wqe = qed_chain_produce(&qp->sq.pbl);
3549 	qp->wqe_wr_id[qp->sq.prod].signaled =
3550 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3551 
3552 	wqe->flags = 0;
3553 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3554 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3555 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3556 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3557 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3558 		   !!(wr->send_flags & IB_SEND_FENCE));
3559 	wqe->prev_wqe_size = qp->prev_wqe_size;
3560 
3561 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3562 
3563 	switch (wr->opcode) {
3564 	case IB_WR_SEND_WITH_IMM:
3565 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3566 			rc = -EINVAL;
3567 			*bad_wr = wr;
3568 			break;
3569 		}
3570 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3571 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3572 		swqe->wqe_size = 2;
3573 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3574 
3575 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3576 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3577 						   wr, bad_wr);
3578 		swqe->length = cpu_to_le32(length);
3579 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3580 		qp->prev_wqe_size = swqe->wqe_size;
3581 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3582 		break;
3583 	case IB_WR_SEND:
3584 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3585 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3586 
3587 		swqe->wqe_size = 2;
3588 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3589 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3590 						   wr, bad_wr);
3591 		swqe->length = cpu_to_le32(length);
3592 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3593 		qp->prev_wqe_size = swqe->wqe_size;
3594 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3595 		break;
3596 	case IB_WR_SEND_WITH_INV:
3597 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3598 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3599 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3600 		swqe->wqe_size = 2;
3601 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3602 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3603 						   wr, bad_wr);
3604 		swqe->length = cpu_to_le32(length);
3605 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3606 		qp->prev_wqe_size = swqe->wqe_size;
3607 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3608 		break;
3609 
3610 	case IB_WR_RDMA_WRITE_WITH_IMM:
3611 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3612 			rc = -EINVAL;
3613 			*bad_wr = wr;
3614 			break;
3615 		}
3616 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3617 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3618 
3619 		rwqe->wqe_size = 2;
3620 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3621 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3622 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3623 						   wr, bad_wr);
3624 		rwqe->length = cpu_to_le32(length);
3625 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3626 		qp->prev_wqe_size = rwqe->wqe_size;
3627 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3628 		break;
3629 	case IB_WR_RDMA_WRITE:
3630 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3631 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3632 
3633 		rwqe->wqe_size = 2;
3634 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3635 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3636 						   wr, bad_wr);
3637 		rwqe->length = cpu_to_le32(length);
3638 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3639 		qp->prev_wqe_size = rwqe->wqe_size;
3640 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3641 		break;
3642 	case IB_WR_RDMA_READ_WITH_INV:
3643 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3644 		fallthrough;	/* same is identical to RDMA READ */
3645 
3646 	case IB_WR_RDMA_READ:
3647 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3648 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3649 
3650 		rwqe->wqe_size = 2;
3651 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3652 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3653 						   wr, bad_wr);
3654 		rwqe->length = cpu_to_le32(length);
3655 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3656 		qp->prev_wqe_size = rwqe->wqe_size;
3657 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3658 		break;
3659 
3660 	case IB_WR_ATOMIC_CMP_AND_SWP:
3661 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3662 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3663 		awqe1->wqe_size = 4;
3664 
3665 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3666 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3667 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3668 
3669 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3670 
3671 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3672 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3673 			DMA_REGPAIR_LE(awqe3->swap_data,
3674 				       atomic_wr(wr)->compare_add);
3675 		} else {
3676 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3677 			DMA_REGPAIR_LE(awqe3->swap_data,
3678 				       atomic_wr(wr)->swap);
3679 			DMA_REGPAIR_LE(awqe3->cmp_data,
3680 				       atomic_wr(wr)->compare_add);
3681 		}
3682 
3683 		qedr_prepare_sq_sges(qp, NULL, wr);
3684 
3685 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3686 		qp->prev_wqe_size = awqe1->wqe_size;
3687 		break;
3688 
3689 	case IB_WR_LOCAL_INV:
3690 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3691 		iwqe->wqe_size = 1;
3692 
3693 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3694 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3695 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3696 		qp->prev_wqe_size = iwqe->wqe_size;
3697 		break;
3698 	case IB_WR_REG_MR:
3699 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3700 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3701 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3702 		fwqe1->wqe_size = 2;
3703 
3704 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3705 		if (rc) {
3706 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3707 			*bad_wr = wr;
3708 			break;
3709 		}
3710 
3711 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3712 		qp->prev_wqe_size = fwqe1->wqe_size;
3713 		break;
3714 	default:
3715 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3716 		rc = -EINVAL;
3717 		*bad_wr = wr;
3718 		break;
3719 	}
3720 
3721 	if (*bad_wr) {
3722 		u16 value;
3723 
3724 		/* Restore prod to its position before
3725 		 * this WR was processed
3726 		 */
3727 		value = le16_to_cpu(qp->sq.db_data.data.value);
3728 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3729 
3730 		/* Restore prev_wqe_size */
3731 		qp->prev_wqe_size = wqe->prev_wqe_size;
3732 		rc = -EINVAL;
3733 		DP_ERR(dev, "POST SEND FAILED\n");
3734 	}
3735 
3736 	return rc;
3737 }
3738 
3739 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3740 		   const struct ib_send_wr **bad_wr)
3741 {
3742 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3743 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3744 	unsigned long flags;
3745 	int rc = 0;
3746 
3747 	*bad_wr = NULL;
3748 
3749 	if (qp->qp_type == IB_QPT_GSI)
3750 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3751 
3752 	spin_lock_irqsave(&qp->q_lock, flags);
3753 
3754 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3755 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3756 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3757 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3758 			spin_unlock_irqrestore(&qp->q_lock, flags);
3759 			*bad_wr = wr;
3760 			DP_DEBUG(dev, QEDR_MSG_CQ,
3761 				 "QP in wrong state! QP icid=0x%x state %d\n",
3762 				 qp->icid, qp->state);
3763 			return -EINVAL;
3764 		}
3765 	}
3766 
3767 	while (wr) {
3768 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3769 		if (rc)
3770 			break;
3771 
3772 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3773 
3774 		qedr_inc_sw_prod(&qp->sq);
3775 
3776 		qp->sq.db_data.data.value++;
3777 
3778 		wr = wr->next;
3779 	}
3780 
3781 	/* Trigger doorbell
3782 	 * If there was a failure in the first WR then it will be triggered in
3783 	 * vane. However this is not harmful (as long as the producer value is
3784 	 * unchanged). For performance reasons we avoid checking for this
3785 	 * redundant doorbell.
3786 	 *
3787 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3788 	 * soon as we give the doorbell, we could get a completion
3789 	 * for this wr, therefore we need to make sure that the
3790 	 * memory is updated before giving the doorbell.
3791 	 * During qedr_poll_cq, rmb is called before accessing the
3792 	 * cqe. This covers for the smp_rmb as well.
3793 	 */
3794 	smp_wmb();
3795 	writel(qp->sq.db_data.raw, qp->sq.db);
3796 
3797 	spin_unlock_irqrestore(&qp->q_lock, flags);
3798 
3799 	return rc;
3800 }
3801 
3802 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3803 {
3804 	u32 used;
3805 
3806 	/* Calculate number of elements used based on producer
3807 	 * count and consumer count and subtract it from max
3808 	 * work request supported so that we get elements left.
3809 	 */
3810 	used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
3811 
3812 	return hw_srq->max_wr - used;
3813 }
3814 
3815 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3816 		       const struct ib_recv_wr **bad_wr)
3817 {
3818 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3819 	struct qedr_srq_hwq_info *hw_srq;
3820 	struct qedr_dev *dev = srq->dev;
3821 	struct qed_chain *pbl;
3822 	unsigned long flags;
3823 	int status = 0;
3824 	u32 num_sge;
3825 
3826 	spin_lock_irqsave(&srq->lock, flags);
3827 
3828 	hw_srq = &srq->hw_srq;
3829 	pbl = &srq->hw_srq.pbl;
3830 	while (wr) {
3831 		struct rdma_srq_wqe_header *hdr;
3832 		int i;
3833 
3834 		if (!qedr_srq_elem_left(hw_srq) ||
3835 		    wr->num_sge > srq->hw_srq.max_sges) {
3836 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3837 			       hw_srq->wr_prod_cnt,
3838 			       atomic_read(&hw_srq->wr_cons_cnt),
3839 			       wr->num_sge, srq->hw_srq.max_sges);
3840 			status = -ENOMEM;
3841 			*bad_wr = wr;
3842 			break;
3843 		}
3844 
3845 		hdr = qed_chain_produce(pbl);
3846 		num_sge = wr->num_sge;
3847 		/* Set number of sge and work request id in header */
3848 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3849 
3850 		srq->hw_srq.wr_prod_cnt++;
3851 		hw_srq->wqe_prod++;
3852 		hw_srq->sge_prod++;
3853 
3854 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3855 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3856 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3857 
3858 		for (i = 0; i < wr->num_sge; i++) {
3859 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3860 
3861 			/* Set SGE length, lkey and address */
3862 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3863 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3864 
3865 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3866 				 "[%d]: len %d key %x addr %x:%x\n",
3867 				 i, srq_sge->length, srq_sge->l_key,
3868 				 srq_sge->addr.hi, srq_sge->addr.lo);
3869 			hw_srq->sge_prod++;
3870 		}
3871 
3872 		/* Update WQE and SGE information before
3873 		 * updating producer.
3874 		 */
3875 		dma_wmb();
3876 
3877 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3878 		 * in first 4 bytes and need to update WQE producer in
3879 		 * next 4 bytes.
3880 		 */
3881 		srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod);
3882 		/* Make sure sge producer is updated first */
3883 		dma_wmb();
3884 		srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod);
3885 
3886 		wr = wr->next;
3887 	}
3888 
3889 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3890 		 qed_chain_get_elem_left(pbl));
3891 	spin_unlock_irqrestore(&srq->lock, flags);
3892 
3893 	return status;
3894 }
3895 
3896 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3897 		   const struct ib_recv_wr **bad_wr)
3898 {
3899 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3900 	struct qedr_dev *dev = qp->dev;
3901 	unsigned long flags;
3902 	int status = 0;
3903 
3904 	if (qp->qp_type == IB_QPT_GSI)
3905 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3906 
3907 	spin_lock_irqsave(&qp->q_lock, flags);
3908 
3909 	while (wr) {
3910 		int i;
3911 
3912 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3913 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3914 		    wr->num_sge > qp->rq.max_sges) {
3915 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3916 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3917 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3918 			       qp->rq.max_sges);
3919 			status = -ENOMEM;
3920 			*bad_wr = wr;
3921 			break;
3922 		}
3923 		for (i = 0; i < wr->num_sge; i++) {
3924 			u32 flags = 0;
3925 			struct rdma_rq_sge *rqe =
3926 			    qed_chain_produce(&qp->rq.pbl);
3927 
3928 			/* First one must include the number
3929 			 * of SGE in the list
3930 			 */
3931 			if (!i)
3932 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3933 					  wr->num_sge);
3934 
3935 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3936 				  wr->sg_list[i].lkey);
3937 
3938 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3939 				   wr->sg_list[i].length, flags);
3940 		}
3941 
3942 		/* Special case of no sges. FW requires between 1-4 sges...
3943 		 * in this case we need to post 1 sge with length zero. this is
3944 		 * because rdma write with immediate consumes an RQ.
3945 		 */
3946 		if (!wr->num_sge) {
3947 			u32 flags = 0;
3948 			struct rdma_rq_sge *rqe =
3949 			    qed_chain_produce(&qp->rq.pbl);
3950 
3951 			/* First one must include the number
3952 			 * of SGE in the list
3953 			 */
3954 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3955 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3956 
3957 			RQ_SGE_SET(rqe, 0, 0, flags);
3958 			i = 1;
3959 		}
3960 
3961 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3962 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3963 
3964 		qedr_inc_sw_prod(&qp->rq);
3965 
3966 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3967 		 * soon as we give the doorbell, we could get a completion
3968 		 * for this wr, therefore we need to make sure that the
3969 		 * memory is update before giving the doorbell.
3970 		 * During qedr_poll_cq, rmb is called before accessing the
3971 		 * cqe. This covers for the smp_rmb as well.
3972 		 */
3973 		smp_wmb();
3974 
3975 		qp->rq.db_data.data.value++;
3976 
3977 		writel(qp->rq.db_data.raw, qp->rq.db);
3978 
3979 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3980 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3981 		}
3982 
3983 		wr = wr->next;
3984 	}
3985 
3986 	spin_unlock_irqrestore(&qp->q_lock, flags);
3987 
3988 	return status;
3989 }
3990 
3991 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3992 {
3993 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3994 
3995 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3996 		cq->pbl_toggle;
3997 }
3998 
3999 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
4000 {
4001 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
4002 	struct qedr_qp *qp;
4003 
4004 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
4005 						   resp_cqe->qp_handle.lo,
4006 						   u64);
4007 	return qp;
4008 }
4009 
4010 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
4011 {
4012 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
4013 
4014 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
4015 }
4016 
4017 /* Return latest CQE (needs processing) */
4018 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
4019 {
4020 	return cq->latest_cqe;
4021 }
4022 
4023 /* In fmr we need to increase the number of fmr completed counter for the fmr
4024  * algorithm determining whether we can free a pbl or not.
4025  * we need to perform this whether the work request was signaled or not. for
4026  * this purpose we call this function from the condition that checks if a wr
4027  * should be skipped, to make sure we don't miss it ( possibly this fmr
4028  * operation was not signalted)
4029  */
4030 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
4031 {
4032 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
4033 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
4034 }
4035 
4036 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
4037 		       struct qedr_cq *cq, int num_entries,
4038 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
4039 		       int force)
4040 {
4041 	u16 cnt = 0;
4042 
4043 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
4044 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
4045 			qedr_chk_if_fmr(qp);
4046 			/* skip WC */
4047 			goto next_cqe;
4048 		}
4049 
4050 		/* fill WC */
4051 		wc->status = status;
4052 		wc->vendor_err = 0;
4053 		wc->wc_flags = 0;
4054 		wc->src_qp = qp->id;
4055 		wc->qp = &qp->ibqp;
4056 
4057 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
4058 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
4059 
4060 		switch (wc->opcode) {
4061 		case IB_WC_RDMA_WRITE:
4062 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
4063 			break;
4064 		case IB_WC_COMP_SWAP:
4065 		case IB_WC_FETCH_ADD:
4066 			wc->byte_len = 8;
4067 			break;
4068 		case IB_WC_REG_MR:
4069 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
4070 			break;
4071 		case IB_WC_RDMA_READ:
4072 		case IB_WC_SEND:
4073 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
4074 			break;
4075 		default:
4076 			break;
4077 		}
4078 
4079 		num_entries--;
4080 		wc++;
4081 		cnt++;
4082 next_cqe:
4083 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
4084 			qed_chain_consume(&qp->sq.pbl);
4085 		qedr_inc_sw_cons(&qp->sq);
4086 	}
4087 
4088 	return cnt;
4089 }
4090 
4091 static int qedr_poll_cq_req(struct qedr_dev *dev,
4092 			    struct qedr_qp *qp, struct qedr_cq *cq,
4093 			    int num_entries, struct ib_wc *wc,
4094 			    struct rdma_cqe_requester *req)
4095 {
4096 	int cnt = 0;
4097 
4098 	switch (req->status) {
4099 	case RDMA_CQE_REQ_STS_OK:
4100 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4101 				  IB_WC_SUCCESS, 0);
4102 		break;
4103 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
4104 		if (qp->state != QED_ROCE_QP_STATE_ERR)
4105 			DP_DEBUG(dev, QEDR_MSG_CQ,
4106 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4107 				 cq->icid, qp->icid);
4108 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4109 				  IB_WC_WR_FLUSH_ERR, 1);
4110 		break;
4111 	default:
4112 		/* process all WQE before the cosumer */
4113 		qp->state = QED_ROCE_QP_STATE_ERR;
4114 		cnt = process_req(dev, qp, cq, num_entries, wc,
4115 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
4116 		wc += cnt;
4117 		/* if we have extra WC fill it with actual error info */
4118 		if (cnt < num_entries) {
4119 			enum ib_wc_status wc_status;
4120 
4121 			switch (req->status) {
4122 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4123 				DP_ERR(dev,
4124 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4125 				       cq->icid, qp->icid);
4126 				wc_status = IB_WC_BAD_RESP_ERR;
4127 				break;
4128 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4129 				DP_ERR(dev,
4130 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4131 				       cq->icid, qp->icid);
4132 				wc_status = IB_WC_LOC_LEN_ERR;
4133 				break;
4134 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4135 				DP_ERR(dev,
4136 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4137 				       cq->icid, qp->icid);
4138 				wc_status = IB_WC_LOC_QP_OP_ERR;
4139 				break;
4140 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4141 				DP_ERR(dev,
4142 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4143 				       cq->icid, qp->icid);
4144 				wc_status = IB_WC_LOC_PROT_ERR;
4145 				break;
4146 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4147 				DP_ERR(dev,
4148 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4149 				       cq->icid, qp->icid);
4150 				wc_status = IB_WC_MW_BIND_ERR;
4151 				break;
4152 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4153 				DP_ERR(dev,
4154 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4155 				       cq->icid, qp->icid);
4156 				wc_status = IB_WC_REM_INV_REQ_ERR;
4157 				break;
4158 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4159 				DP_ERR(dev,
4160 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4161 				       cq->icid, qp->icid);
4162 				wc_status = IB_WC_REM_ACCESS_ERR;
4163 				break;
4164 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4165 				DP_ERR(dev,
4166 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4167 				       cq->icid, qp->icid);
4168 				wc_status = IB_WC_REM_OP_ERR;
4169 				break;
4170 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4171 				DP_ERR(dev,
4172 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4173 				       cq->icid, qp->icid);
4174 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4175 				break;
4176 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4177 				DP_ERR(dev,
4178 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4179 				       cq->icid, qp->icid);
4180 				wc_status = IB_WC_RETRY_EXC_ERR;
4181 				break;
4182 			default:
4183 				DP_ERR(dev,
4184 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4185 				       cq->icid, qp->icid);
4186 				wc_status = IB_WC_GENERAL_ERR;
4187 			}
4188 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4189 					   wc_status, 1);
4190 		}
4191 	}
4192 
4193 	return cnt;
4194 }
4195 
4196 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4197 {
4198 	switch (status) {
4199 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4200 		return IB_WC_LOC_ACCESS_ERR;
4201 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4202 		return IB_WC_LOC_LEN_ERR;
4203 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4204 		return IB_WC_LOC_QP_OP_ERR;
4205 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4206 		return IB_WC_LOC_PROT_ERR;
4207 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4208 		return IB_WC_MW_BIND_ERR;
4209 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4210 		return IB_WC_REM_INV_RD_REQ_ERR;
4211 	case RDMA_CQE_RESP_STS_OK:
4212 		return IB_WC_SUCCESS;
4213 	default:
4214 		return IB_WC_GENERAL_ERR;
4215 	}
4216 }
4217 
4218 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4219 					  struct ib_wc *wc)
4220 {
4221 	wc->status = IB_WC_SUCCESS;
4222 	wc->byte_len = le32_to_cpu(resp->length);
4223 
4224 	if (resp->flags & QEDR_RESP_IMM) {
4225 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4226 		wc->wc_flags |= IB_WC_WITH_IMM;
4227 
4228 		if (resp->flags & QEDR_RESP_RDMA)
4229 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4230 
4231 		if (resp->flags & QEDR_RESP_INV)
4232 			return -EINVAL;
4233 
4234 	} else if (resp->flags & QEDR_RESP_INV) {
4235 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4236 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4237 
4238 		if (resp->flags & QEDR_RESP_RDMA)
4239 			return -EINVAL;
4240 
4241 	} else if (resp->flags & QEDR_RESP_RDMA) {
4242 		return -EINVAL;
4243 	}
4244 
4245 	return 0;
4246 }
4247 
4248 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4249 			       struct qedr_cq *cq, struct ib_wc *wc,
4250 			       struct rdma_cqe_responder *resp, u64 wr_id)
4251 {
4252 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4253 	wc->opcode = IB_WC_RECV;
4254 	wc->wc_flags = 0;
4255 
4256 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4257 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4258 			DP_ERR(dev,
4259 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4260 			       cq, cq->icid, resp->flags);
4261 
4262 	} else {
4263 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4264 		if (wc->status == IB_WC_GENERAL_ERR)
4265 			DP_ERR(dev,
4266 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4267 			       cq, cq->icid, resp->status);
4268 	}
4269 
4270 	/* Fill the rest of the WC */
4271 	wc->vendor_err = 0;
4272 	wc->src_qp = qp->id;
4273 	wc->qp = &qp->ibqp;
4274 	wc->wr_id = wr_id;
4275 }
4276 
4277 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4278 				struct qedr_cq *cq, struct ib_wc *wc,
4279 				struct rdma_cqe_responder *resp)
4280 {
4281 	struct qedr_srq *srq = qp->srq;
4282 	u64 wr_id;
4283 
4284 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4285 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4286 
4287 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4288 		wc->status = IB_WC_WR_FLUSH_ERR;
4289 		wc->vendor_err = 0;
4290 		wc->wr_id = wr_id;
4291 		wc->byte_len = 0;
4292 		wc->src_qp = qp->id;
4293 		wc->qp = &qp->ibqp;
4294 		wc->wr_id = wr_id;
4295 	} else {
4296 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4297 	}
4298 	atomic_inc(&srq->hw_srq.wr_cons_cnt);
4299 
4300 	return 1;
4301 }
4302 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4303 			    struct qedr_cq *cq, struct ib_wc *wc,
4304 			    struct rdma_cqe_responder *resp)
4305 {
4306 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4307 
4308 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4309 
4310 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4311 		qed_chain_consume(&qp->rq.pbl);
4312 	qedr_inc_sw_cons(&qp->rq);
4313 
4314 	return 1;
4315 }
4316 
4317 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4318 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4319 {
4320 	u16 cnt = 0;
4321 
4322 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4323 		/* fill WC */
4324 		wc->status = IB_WC_WR_FLUSH_ERR;
4325 		wc->vendor_err = 0;
4326 		wc->wc_flags = 0;
4327 		wc->src_qp = qp->id;
4328 		wc->byte_len = 0;
4329 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4330 		wc->qp = &qp->ibqp;
4331 		num_entries--;
4332 		wc++;
4333 		cnt++;
4334 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4335 			qed_chain_consume(&qp->rq.pbl);
4336 		qedr_inc_sw_cons(&qp->rq);
4337 	}
4338 
4339 	return cnt;
4340 }
4341 
4342 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4343 				 struct rdma_cqe_responder *resp, int *update)
4344 {
4345 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4346 		consume_cqe(cq);
4347 		*update |= 1;
4348 	}
4349 }
4350 
4351 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4352 				 struct qedr_cq *cq, int num_entries,
4353 				 struct ib_wc *wc,
4354 				 struct rdma_cqe_responder *resp)
4355 {
4356 	int cnt;
4357 
4358 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4359 	consume_cqe(cq);
4360 
4361 	return cnt;
4362 }
4363 
4364 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4365 			     struct qedr_cq *cq, int num_entries,
4366 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4367 			     int *update)
4368 {
4369 	int cnt;
4370 
4371 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4372 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4373 					 resp->rq_cons_or_srq_id);
4374 		try_consume_resp_cqe(cq, qp, resp, update);
4375 	} else {
4376 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4377 		consume_cqe(cq);
4378 		*update |= 1;
4379 	}
4380 
4381 	return cnt;
4382 }
4383 
4384 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4385 				struct rdma_cqe_requester *req, int *update)
4386 {
4387 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4388 		consume_cqe(cq);
4389 		*update |= 1;
4390 	}
4391 }
4392 
4393 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4394 {
4395 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4396 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4397 	union rdma_cqe *cqe;
4398 	u32 old_cons, new_cons;
4399 	unsigned long flags;
4400 	int update = 0;
4401 	int done = 0;
4402 
4403 	if (cq->destroyed) {
4404 		DP_ERR(dev,
4405 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4406 		       cq, cq->icid);
4407 		return 0;
4408 	}
4409 
4410 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4411 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4412 
4413 	spin_lock_irqsave(&cq->cq_lock, flags);
4414 	cqe = cq->latest_cqe;
4415 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4416 	while (num_entries && is_valid_cqe(cq, cqe)) {
4417 		struct qedr_qp *qp;
4418 		int cnt = 0;
4419 
4420 		/* prevent speculative reads of any field of CQE */
4421 		rmb();
4422 
4423 		qp = cqe_get_qp(cqe);
4424 		if (!qp) {
4425 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4426 			break;
4427 		}
4428 
4429 		wc->qp = &qp->ibqp;
4430 
4431 		switch (cqe_get_type(cqe)) {
4432 		case RDMA_CQE_TYPE_REQUESTER:
4433 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4434 					       &cqe->req);
4435 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4436 			break;
4437 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4438 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4439 						&cqe->resp, &update);
4440 			break;
4441 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4442 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4443 						    wc, &cqe->resp);
4444 			update = 1;
4445 			break;
4446 		case RDMA_CQE_TYPE_INVALID:
4447 		default:
4448 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4449 			       cqe_get_type(cqe));
4450 		}
4451 		num_entries -= cnt;
4452 		wc += cnt;
4453 		done += cnt;
4454 
4455 		cqe = get_cqe(cq);
4456 	}
4457 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4458 
4459 	cq->cq_cons += new_cons - old_cons;
4460 
4461 	if (update)
4462 		/* doorbell notifies abount latest VALID entry,
4463 		 * but chain already point to the next INVALID one
4464 		 */
4465 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4466 
4467 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4468 	return done;
4469 }
4470 
4471 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4472 		     u32 port_num, const struct ib_wc *in_wc,
4473 		     const struct ib_grh *in_grh, const struct ib_mad *in,
4474 		     struct ib_mad *out_mad, size_t *out_mad_size,
4475 		     u16 *out_mad_pkey_index)
4476 {
4477 	return IB_MAD_RESULT_SUCCESS;
4478 }
4479