xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision dd21bfa4)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
55 
56 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
57 #define	RDMA_MAX_SGE_PER_SRQ	(4)
58 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
59 
60 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
61 
62 enum {
63 	QEDR_USER_MMAP_IO_WC = 0,
64 	QEDR_USER_MMAP_PHYS_PAGE,
65 };
66 
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
68 					size_t len)
69 {
70 	size_t min_len = min_t(size_t, len, udata->outlen);
71 
72 	return ib_copy_to_udata(udata, src, min_len);
73 }
74 
75 int qedr_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
76 {
77 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
78 		return -EINVAL;
79 
80 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
81 	return 0;
82 }
83 
84 int qedr_iw_query_gid(struct ib_device *ibdev, u32 port,
85 		      int index, union ib_gid *sgid)
86 {
87 	struct qedr_dev *dev = get_qedr_dev(ibdev);
88 
89 	memset(sgid->raw, 0, sizeof(sgid->raw));
90 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
91 
92 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 		 sgid->global.interface_id, sgid->global.subnet_prefix);
94 
95 	return 0;
96 }
97 
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
99 {
100 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 	struct qedr_device_attr *qattr = &dev->attr;
102 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
103 
104 	srq_attr->srq_limit = srq->srq_limit;
105 	srq_attr->max_wr = qattr->max_srq_wr;
106 	srq_attr->max_sge = qattr->max_sge;
107 
108 	return 0;
109 }
110 
111 int qedr_query_device(struct ib_device *ibdev,
112 		      struct ib_device_attr *attr, struct ib_udata *udata)
113 {
114 	struct qedr_dev *dev = get_qedr_dev(ibdev);
115 	struct qedr_device_attr *qattr = &dev->attr;
116 
117 	if (!dev->rdma_ctx) {
118 		DP_ERR(dev,
119 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
120 		       dev->rdma_ctx);
121 		return -EINVAL;
122 	}
123 
124 	memset(attr, 0, sizeof(*attr));
125 
126 	attr->fw_ver = qattr->fw_ver;
127 	attr->sys_image_guid = qattr->sys_image_guid;
128 	attr->max_mr_size = qattr->max_mr_size;
129 	attr->page_size_cap = qattr->page_size_caps;
130 	attr->vendor_id = qattr->vendor_id;
131 	attr->vendor_part_id = qattr->vendor_part_id;
132 	attr->hw_ver = qattr->hw_ver;
133 	attr->max_qp = qattr->max_qp;
134 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 	    IB_DEVICE_RC_RNR_NAK_GEN |
137 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
138 
139 	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
140 		attr->device_cap_flags |= IB_DEVICE_XRC;
141 	attr->max_send_sge = qattr->max_sge;
142 	attr->max_recv_sge = qattr->max_sge;
143 	attr->max_sge_rd = qattr->max_sge;
144 	attr->max_cq = qattr->max_cq;
145 	attr->max_cqe = qattr->max_cqe;
146 	attr->max_mr = qattr->max_mr;
147 	attr->max_mw = qattr->max_mw;
148 	attr->max_pd = qattr->max_pd;
149 	attr->atomic_cap = dev->atomic_cap;
150 	attr->max_qp_init_rd_atom =
151 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
152 	attr->max_qp_rd_atom =
153 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
154 		attr->max_qp_init_rd_atom);
155 
156 	attr->max_srq = qattr->max_srq;
157 	attr->max_srq_sge = qattr->max_srq_sge;
158 	attr->max_srq_wr = qattr->max_srq_wr;
159 
160 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
161 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
162 	attr->max_pkeys = qattr->max_pkey;
163 	attr->max_ah = qattr->max_ah;
164 
165 	return 0;
166 }
167 
168 static inline void get_link_speed_and_width(int speed, u16 *ib_speed,
169 					    u8 *ib_width)
170 {
171 	switch (speed) {
172 	case 1000:
173 		*ib_speed = IB_SPEED_SDR;
174 		*ib_width = IB_WIDTH_1X;
175 		break;
176 	case 10000:
177 		*ib_speed = IB_SPEED_QDR;
178 		*ib_width = IB_WIDTH_1X;
179 		break;
180 
181 	case 20000:
182 		*ib_speed = IB_SPEED_DDR;
183 		*ib_width = IB_WIDTH_4X;
184 		break;
185 
186 	case 25000:
187 		*ib_speed = IB_SPEED_EDR;
188 		*ib_width = IB_WIDTH_1X;
189 		break;
190 
191 	case 40000:
192 		*ib_speed = IB_SPEED_QDR;
193 		*ib_width = IB_WIDTH_4X;
194 		break;
195 
196 	case 50000:
197 		*ib_speed = IB_SPEED_HDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 100000:
202 		*ib_speed = IB_SPEED_EDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	default:
207 		/* Unsupported */
208 		*ib_speed = IB_SPEED_SDR;
209 		*ib_width = IB_WIDTH_1X;
210 	}
211 }
212 
213 int qedr_query_port(struct ib_device *ibdev, u32 port,
214 		    struct ib_port_attr *attr)
215 {
216 	struct qedr_dev *dev;
217 	struct qed_rdma_port *rdma_port;
218 
219 	dev = get_qedr_dev(ibdev);
220 
221 	if (!dev->rdma_ctx) {
222 		DP_ERR(dev, "rdma_ctx is NULL\n");
223 		return -EINVAL;
224 	}
225 
226 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
227 
228 	/* *attr being zeroed by the caller, avoid zeroing it here */
229 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
230 		attr->state = IB_PORT_ACTIVE;
231 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
232 	} else {
233 		attr->state = IB_PORT_DOWN;
234 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
235 	}
236 	attr->max_mtu = IB_MTU_4096;
237 	attr->lid = 0;
238 	attr->lmc = 0;
239 	attr->sm_lid = 0;
240 	attr->sm_sl = 0;
241 	attr->ip_gids = true;
242 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
243 		attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu);
244 		attr->gid_tbl_len = 1;
245 	} else {
246 		attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
247 		attr->gid_tbl_len = QEDR_MAX_SGID;
248 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
249 	}
250 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
251 	attr->qkey_viol_cntr = 0;
252 	get_link_speed_and_width(rdma_port->link_speed,
253 				 &attr->active_speed, &attr->active_width);
254 	attr->max_msg_sz = rdma_port->max_msg_size;
255 	attr->max_vl_num = 4;
256 
257 	return 0;
258 }
259 
260 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
261 {
262 	struct ib_device *ibdev = uctx->device;
263 	int rc;
264 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
265 	struct qedr_alloc_ucontext_resp uresp = {};
266 	struct qedr_alloc_ucontext_req ureq = {};
267 	struct qedr_dev *dev = get_qedr_dev(ibdev);
268 	struct qed_rdma_add_user_out_params oparams;
269 	struct qedr_user_mmap_entry *entry;
270 
271 	if (!udata)
272 		return -EFAULT;
273 
274 	if (udata->inlen) {
275 		rc = ib_copy_from_udata(&ureq, udata,
276 					min(sizeof(ureq), udata->inlen));
277 		if (rc) {
278 			DP_ERR(dev, "Problem copying data from user space\n");
279 			return -EFAULT;
280 		}
281 		ctx->edpm_mode = !!(ureq.context_flags &
282 				    QEDR_ALLOC_UCTX_EDPM_MODE);
283 		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
284 	}
285 
286 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
287 	if (rc) {
288 		DP_ERR(dev,
289 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
290 		       rc);
291 		return rc;
292 	}
293 
294 	ctx->dpi = oparams.dpi;
295 	ctx->dpi_addr = oparams.dpi_addr;
296 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
297 	ctx->dpi_size = oparams.dpi_size;
298 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
299 	if (!entry) {
300 		rc = -ENOMEM;
301 		goto err;
302 	}
303 
304 	entry->io_address = ctx->dpi_phys_addr;
305 	entry->length = ctx->dpi_size;
306 	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
307 	entry->dpi = ctx->dpi;
308 	entry->dev = dev;
309 	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
310 					 ctx->dpi_size);
311 	if (rc) {
312 		kfree(entry);
313 		goto err;
314 	}
315 	ctx->db_mmap_entry = &entry->rdma_entry;
316 
317 	if (!dev->user_dpm_enabled)
318 		uresp.dpm_flags = 0;
319 	else if (rdma_protocol_iwarp(&dev->ibdev, 1))
320 		uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
321 	else
322 		uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
323 				  QEDR_DPM_TYPE_ROCE_LEGACY |
324 				  QEDR_DPM_TYPE_ROCE_EDPM_MODE;
325 
326 	if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
327 		uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
328 		uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
329 		uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
330 		uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
331 	}
332 
333 	uresp.wids_enabled = 1;
334 	uresp.wid_count = oparams.wid_count;
335 	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
336 	uresp.db_size = ctx->dpi_size;
337 	uresp.max_send_wr = dev->attr.max_sqe;
338 	uresp.max_recv_wr = dev->attr.max_rqe;
339 	uresp.max_srq_wr = dev->attr.max_srq_wr;
340 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
341 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
342 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
343 	uresp.max_cqes = QEDR_MAX_CQES;
344 
345 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
346 	if (rc)
347 		goto err;
348 
349 	ctx->dev = dev;
350 
351 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
352 		 &ctx->ibucontext);
353 	return 0;
354 
355 err:
356 	if (!ctx->db_mmap_entry)
357 		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
358 	else
359 		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
360 
361 	return rc;
362 }
363 
364 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
365 {
366 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
367 
368 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
369 		 uctx);
370 
371 	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
372 }
373 
374 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
375 {
376 	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
377 	struct qedr_dev *dev = entry->dev;
378 
379 	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
380 		free_page((unsigned long)entry->address);
381 	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
382 		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
383 
384 	kfree(entry);
385 }
386 
387 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
388 {
389 	struct ib_device *dev = ucontext->device;
390 	size_t length = vma->vm_end - vma->vm_start;
391 	struct rdma_user_mmap_entry *rdma_entry;
392 	struct qedr_user_mmap_entry *entry;
393 	int rc = 0;
394 	u64 pfn;
395 
396 	ibdev_dbg(dev,
397 		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
398 		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
399 
400 	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
401 	if (!rdma_entry) {
402 		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
403 			  vma->vm_pgoff);
404 		return -EINVAL;
405 	}
406 	entry = get_qedr_mmap_entry(rdma_entry);
407 	ibdev_dbg(dev,
408 		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
409 		  entry->io_address, length, entry->mmap_flag);
410 
411 	switch (entry->mmap_flag) {
412 	case QEDR_USER_MMAP_IO_WC:
413 		pfn = entry->io_address >> PAGE_SHIFT;
414 		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
415 				       pgprot_writecombine(vma->vm_page_prot),
416 				       rdma_entry);
417 		break;
418 	case QEDR_USER_MMAP_PHYS_PAGE:
419 		rc = vm_insert_page(vma, vma->vm_start,
420 				    virt_to_page(entry->address));
421 		break;
422 	default:
423 		rc = -EINVAL;
424 	}
425 
426 	if (rc)
427 		ibdev_dbg(dev,
428 			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
429 			  entry->io_address, length, entry->mmap_flag, rc);
430 
431 	rdma_user_mmap_entry_put(rdma_entry);
432 	return rc;
433 }
434 
435 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
436 {
437 	struct ib_device *ibdev = ibpd->device;
438 	struct qedr_dev *dev = get_qedr_dev(ibdev);
439 	struct qedr_pd *pd = get_qedr_pd(ibpd);
440 	u16 pd_id;
441 	int rc;
442 
443 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
444 		 udata ? "User Lib" : "Kernel");
445 
446 	if (!dev->rdma_ctx) {
447 		DP_ERR(dev, "invalid RDMA context\n");
448 		return -EINVAL;
449 	}
450 
451 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
452 	if (rc)
453 		return rc;
454 
455 	pd->pd_id = pd_id;
456 
457 	if (udata) {
458 		struct qedr_alloc_pd_uresp uresp = {
459 			.pd_id = pd_id,
460 		};
461 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
462 			udata, struct qedr_ucontext, ibucontext);
463 
464 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
465 		if (rc) {
466 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
467 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
468 			return rc;
469 		}
470 
471 		pd->uctx = context;
472 		pd->uctx->pd = pd;
473 	}
474 
475 	return 0;
476 }
477 
478 int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
479 {
480 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
481 	struct qedr_pd *pd = get_qedr_pd(ibpd);
482 
483 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
484 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
485 	return 0;
486 }
487 
488 
489 int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
490 {
491 	struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
492 	struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd);
493 
494 	return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id);
495 }
496 
497 int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
498 {
499 	struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
500 	u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id;
501 
502 	dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id);
503 	return 0;
504 }
505 static void qedr_free_pbl(struct qedr_dev *dev,
506 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
507 {
508 	struct pci_dev *pdev = dev->pdev;
509 	int i;
510 
511 	for (i = 0; i < pbl_info->num_pbls; i++) {
512 		if (!pbl[i].va)
513 			continue;
514 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
515 				  pbl[i].va, pbl[i].pa);
516 	}
517 
518 	kfree(pbl);
519 }
520 
521 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
522 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
523 
524 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
525 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
526 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
527 
528 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
529 					   struct qedr_pbl_info *pbl_info,
530 					   gfp_t flags)
531 {
532 	struct pci_dev *pdev = dev->pdev;
533 	struct qedr_pbl *pbl_table;
534 	dma_addr_t *pbl_main_tbl;
535 	dma_addr_t pa;
536 	void *va;
537 	int i;
538 
539 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
540 	if (!pbl_table)
541 		return ERR_PTR(-ENOMEM);
542 
543 	for (i = 0; i < pbl_info->num_pbls; i++) {
544 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
545 					flags);
546 		if (!va)
547 			goto err;
548 
549 		pbl_table[i].va = va;
550 		pbl_table[i].pa = pa;
551 	}
552 
553 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
554 	 * the first one with physical pointers to all of the rest
555 	 */
556 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
557 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
558 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
559 
560 	return pbl_table;
561 
562 err:
563 	for (i--; i >= 0; i--)
564 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
565 				  pbl_table[i].va, pbl_table[i].pa);
566 
567 	qedr_free_pbl(dev, pbl_info, pbl_table);
568 
569 	return ERR_PTR(-ENOMEM);
570 }
571 
572 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
573 				struct qedr_pbl_info *pbl_info,
574 				u32 num_pbes, int two_layer_capable)
575 {
576 	u32 pbl_capacity;
577 	u32 pbl_size;
578 	u32 num_pbls;
579 
580 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
581 		if (num_pbes > MAX_PBES_TWO_LAYER) {
582 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
583 			       num_pbes);
584 			return -EINVAL;
585 		}
586 
587 		/* calculate required pbl page size */
588 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
589 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
590 			       NUM_PBES_ON_PAGE(pbl_size);
591 
592 		while (pbl_capacity < num_pbes) {
593 			pbl_size *= 2;
594 			pbl_capacity = pbl_size / sizeof(u64);
595 			pbl_capacity = pbl_capacity * pbl_capacity;
596 		}
597 
598 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
599 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
600 		pbl_info->two_layered = true;
601 	} else {
602 		/* One layered PBL */
603 		num_pbls = 1;
604 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
605 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
606 		pbl_info->two_layered = false;
607 	}
608 
609 	pbl_info->num_pbls = num_pbls;
610 	pbl_info->pbl_size = pbl_size;
611 	pbl_info->num_pbes = num_pbes;
612 
613 	DP_DEBUG(dev, QEDR_MSG_MR,
614 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
615 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
616 
617 	return 0;
618 }
619 
620 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
621 			       struct qedr_pbl *pbl,
622 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
623 {
624 	int pbe_cnt, total_num_pbes = 0;
625 	struct qedr_pbl *pbl_tbl;
626 	struct ib_block_iter biter;
627 	struct regpair *pbe;
628 
629 	if (!pbl_info->num_pbes)
630 		return;
631 
632 	/* If we have a two layered pbl, the first pbl points to the rest
633 	 * of the pbls and the first entry lays on the second pbl in the table
634 	 */
635 	if (pbl_info->two_layered)
636 		pbl_tbl = &pbl[1];
637 	else
638 		pbl_tbl = pbl;
639 
640 	pbe = (struct regpair *)pbl_tbl->va;
641 	if (!pbe) {
642 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
643 		return;
644 	}
645 
646 	pbe_cnt = 0;
647 
648 	rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) {
649 		u64 pg_addr = rdma_block_iter_dma_address(&biter);
650 
651 		pbe->lo = cpu_to_le32(pg_addr);
652 		pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
653 
654 		pbe_cnt++;
655 		total_num_pbes++;
656 		pbe++;
657 
658 		if (total_num_pbes == pbl_info->num_pbes)
659 			return;
660 
661 		/* If the given pbl is full storing the pbes, move to next pbl.
662 		 */
663 		if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
664 			pbl_tbl++;
665 			pbe = (struct regpair *)pbl_tbl->va;
666 			pbe_cnt = 0;
667 		}
668 	}
669 }
670 
671 static int qedr_db_recovery_add(struct qedr_dev *dev,
672 				void __iomem *db_addr,
673 				void *db_data,
674 				enum qed_db_rec_width db_width,
675 				enum qed_db_rec_space db_space)
676 {
677 	if (!db_data) {
678 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
679 		return 0;
680 	}
681 
682 	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
683 						 db_width, db_space);
684 }
685 
686 static void qedr_db_recovery_del(struct qedr_dev *dev,
687 				 void __iomem *db_addr,
688 				 void *db_data)
689 {
690 	if (!db_data) {
691 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
692 		return;
693 	}
694 
695 	/* Ignore return code as there is not much we can do about it. Error
696 	 * log will be printed inside.
697 	 */
698 	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
699 }
700 
701 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
702 			      struct qedr_cq *cq, struct ib_udata *udata,
703 			      u32 db_offset)
704 {
705 	struct qedr_create_cq_uresp uresp;
706 	int rc;
707 
708 	memset(&uresp, 0, sizeof(uresp));
709 
710 	uresp.db_offset = db_offset;
711 	uresp.icid = cq->icid;
712 	if (cq->q.db_mmap_entry)
713 		uresp.db_rec_addr =
714 			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
715 
716 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
717 	if (rc)
718 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
719 
720 	return rc;
721 }
722 
723 static void consume_cqe(struct qedr_cq *cq)
724 {
725 	if (cq->latest_cqe == cq->toggle_cqe)
726 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
727 
728 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
729 }
730 
731 static inline int qedr_align_cq_entries(int entries)
732 {
733 	u64 size, aligned_size;
734 
735 	/* We allocate an extra entry that we don't report to the FW. */
736 	size = (entries + 1) * QEDR_CQE_SIZE;
737 	aligned_size = ALIGN(size, PAGE_SIZE);
738 
739 	return aligned_size / QEDR_CQE_SIZE;
740 }
741 
742 static int qedr_init_user_db_rec(struct ib_udata *udata,
743 				 struct qedr_dev *dev, struct qedr_userq *q,
744 				 bool requires_db_rec)
745 {
746 	struct qedr_ucontext *uctx =
747 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
748 					  ibucontext);
749 	struct qedr_user_mmap_entry *entry;
750 	int rc;
751 
752 	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
753 	if (requires_db_rec == 0 || !uctx->db_rec)
754 		return 0;
755 
756 	/* Allocate a page for doorbell recovery, add to mmap */
757 	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
758 	if (!q->db_rec_data) {
759 		DP_ERR(dev, "get_zeroed_page failed\n");
760 		return -ENOMEM;
761 	}
762 
763 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
764 	if (!entry)
765 		goto err_free_db_data;
766 
767 	entry->address = q->db_rec_data;
768 	entry->length = PAGE_SIZE;
769 	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
770 	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
771 					 &entry->rdma_entry,
772 					 PAGE_SIZE);
773 	if (rc)
774 		goto err_free_entry;
775 
776 	q->db_mmap_entry = &entry->rdma_entry;
777 
778 	return 0;
779 
780 err_free_entry:
781 	kfree(entry);
782 
783 err_free_db_data:
784 	free_page((unsigned long)q->db_rec_data);
785 	q->db_rec_data = NULL;
786 	return -ENOMEM;
787 }
788 
789 static inline int qedr_init_user_queue(struct ib_udata *udata,
790 				       struct qedr_dev *dev,
791 				       struct qedr_userq *q, u64 buf_addr,
792 				       size_t buf_len, bool requires_db_rec,
793 				       int access,
794 				       int alloc_and_init)
795 {
796 	u32 fw_pages;
797 	int rc;
798 
799 	q->buf_addr = buf_addr;
800 	q->buf_len = buf_len;
801 	q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
802 	if (IS_ERR(q->umem)) {
803 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
804 		       PTR_ERR(q->umem));
805 		return PTR_ERR(q->umem);
806 	}
807 
808 	fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
809 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
810 	if (rc)
811 		goto err0;
812 
813 	if (alloc_and_init) {
814 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
815 		if (IS_ERR(q->pbl_tbl)) {
816 			rc = PTR_ERR(q->pbl_tbl);
817 			goto err0;
818 		}
819 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
820 				   FW_PAGE_SHIFT);
821 	} else {
822 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
823 		if (!q->pbl_tbl) {
824 			rc = -ENOMEM;
825 			goto err0;
826 		}
827 	}
828 
829 	/* mmap the user address used to store doorbell data for recovery */
830 	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
831 
832 err0:
833 	ib_umem_release(q->umem);
834 	q->umem = NULL;
835 
836 	return rc;
837 }
838 
839 static inline void qedr_init_cq_params(struct qedr_cq *cq,
840 				       struct qedr_ucontext *ctx,
841 				       struct qedr_dev *dev, int vector,
842 				       int chain_entries, int page_cnt,
843 				       u64 pbl_ptr,
844 				       struct qed_rdma_create_cq_in_params
845 				       *params)
846 {
847 	memset(params, 0, sizeof(*params));
848 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
849 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
850 	params->cnq_id = vector;
851 	params->cq_size = chain_entries - 1;
852 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
853 	params->pbl_num_pages = page_cnt;
854 	params->pbl_ptr = pbl_ptr;
855 	params->pbl_two_level = 0;
856 }
857 
858 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
859 {
860 	cq->db.data.agg_flags = flags;
861 	cq->db.data.value = cpu_to_le32(cons);
862 	writeq(cq->db.raw, cq->db_addr);
863 }
864 
865 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
866 {
867 	struct qedr_cq *cq = get_qedr_cq(ibcq);
868 	unsigned long sflags;
869 	struct qedr_dev *dev;
870 
871 	dev = get_qedr_dev(ibcq->device);
872 
873 	if (cq->destroyed) {
874 		DP_ERR(dev,
875 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
876 		       cq, cq->icid);
877 		return -EINVAL;
878 	}
879 
880 
881 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
882 		return 0;
883 
884 	spin_lock_irqsave(&cq->cq_lock, sflags);
885 
886 	cq->arm_flags = 0;
887 
888 	if (flags & IB_CQ_SOLICITED)
889 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
890 
891 	if (flags & IB_CQ_NEXT_COMP)
892 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
893 
894 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
895 
896 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
897 
898 	return 0;
899 }
900 
901 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
902 		   struct ib_udata *udata)
903 {
904 	struct ib_device *ibdev = ibcq->device;
905 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
906 		udata, struct qedr_ucontext, ibucontext);
907 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
908 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
909 	struct qed_chain_init_params chain_params = {
910 		.mode		= QED_CHAIN_MODE_PBL,
911 		.intended_use	= QED_CHAIN_USE_TO_CONSUME,
912 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
913 		.elem_size	= sizeof(union rdma_cqe),
914 	};
915 	struct qedr_dev *dev = get_qedr_dev(ibdev);
916 	struct qed_rdma_create_cq_in_params params;
917 	struct qedr_create_cq_ureq ureq = {};
918 	int vector = attr->comp_vector;
919 	int entries = attr->cqe;
920 	struct qedr_cq *cq = get_qedr_cq(ibcq);
921 	int chain_entries;
922 	u32 db_offset;
923 	int page_cnt;
924 	u64 pbl_ptr;
925 	u16 icid;
926 	int rc;
927 
928 	DP_DEBUG(dev, QEDR_MSG_INIT,
929 		 "create_cq: called from %s. entries=%d, vector=%d\n",
930 		 udata ? "User Lib" : "Kernel", entries, vector);
931 
932 	if (attr->flags)
933 		return -EOPNOTSUPP;
934 
935 	if (entries > QEDR_MAX_CQES) {
936 		DP_ERR(dev,
937 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
938 		       entries, QEDR_MAX_CQES);
939 		return -EINVAL;
940 	}
941 
942 	chain_entries = qedr_align_cq_entries(entries);
943 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
944 	chain_params.num_elems = chain_entries;
945 
946 	/* calc db offset. user will add DPI base, kernel will add db addr */
947 	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
948 
949 	if (udata) {
950 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
951 							 udata->inlen))) {
952 			DP_ERR(dev,
953 			       "create cq: problem copying data from user space\n");
954 			goto err0;
955 		}
956 
957 		if (!ureq.len) {
958 			DP_ERR(dev,
959 			       "create cq: cannot create a cq with 0 entries\n");
960 			goto err0;
961 		}
962 
963 		cq->cq_type = QEDR_CQ_TYPE_USER;
964 
965 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
966 					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
967 					  1);
968 		if (rc)
969 			goto err0;
970 
971 		pbl_ptr = cq->q.pbl_tbl->pa;
972 		page_cnt = cq->q.pbl_info.num_pbes;
973 
974 		cq->ibcq.cqe = chain_entries;
975 		cq->q.db_addr = ctx->dpi_addr + db_offset;
976 	} else {
977 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
978 
979 		rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
980 						   &chain_params);
981 		if (rc)
982 			goto err0;
983 
984 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
985 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
986 		cq->ibcq.cqe = cq->pbl.capacity;
987 	}
988 
989 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
990 			    pbl_ptr, &params);
991 
992 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
993 	if (rc)
994 		goto err1;
995 
996 	cq->icid = icid;
997 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
998 	spin_lock_init(&cq->cq_lock);
999 
1000 	if (udata) {
1001 		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
1002 		if (rc)
1003 			goto err2;
1004 
1005 		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
1006 					  &cq->q.db_rec_data->db_data,
1007 					  DB_REC_WIDTH_64B,
1008 					  DB_REC_USER);
1009 		if (rc)
1010 			goto err2;
1011 
1012 	} else {
1013 		/* Generate doorbell address. */
1014 		cq->db.data.icid = cq->icid;
1015 		cq->db_addr = dev->db_addr + db_offset;
1016 		cq->db.data.params = DB_AGG_CMD_MAX <<
1017 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1018 
1019 		/* point to the very last element, passing it we will toggle */
1020 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1021 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1022 		cq->latest_cqe = NULL;
1023 		consume_cqe(cq);
1024 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1025 
1026 		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1027 					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
1028 		if (rc)
1029 			goto err2;
1030 	}
1031 
1032 	DP_DEBUG(dev, QEDR_MSG_CQ,
1033 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1034 		 cq->icid, cq, params.cq_size);
1035 
1036 	return 0;
1037 
1038 err2:
1039 	destroy_iparams.icid = cq->icid;
1040 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1041 				  &destroy_oparams);
1042 err1:
1043 	if (udata) {
1044 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1045 		ib_umem_release(cq->q.umem);
1046 		if (cq->q.db_mmap_entry)
1047 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1048 	} else {
1049 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1050 	}
1051 err0:
1052 	return -EINVAL;
1053 }
1054 
1055 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1056 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1057 
1058 int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1059 {
1060 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1061 	struct qed_rdma_destroy_cq_out_params oparams;
1062 	struct qed_rdma_destroy_cq_in_params iparams;
1063 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1064 	int iter;
1065 
1066 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1067 
1068 	cq->destroyed = 1;
1069 
1070 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1071 	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1072 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1073 		return 0;
1074 	}
1075 
1076 	iparams.icid = cq->icid;
1077 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1078 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1079 
1080 	if (udata) {
1081 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1082 		ib_umem_release(cq->q.umem);
1083 
1084 		if (cq->q.db_rec_data) {
1085 			qedr_db_recovery_del(dev, cq->q.db_addr,
1086 					     &cq->q.db_rec_data->db_data);
1087 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1088 		}
1089 	} else {
1090 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1091 	}
1092 
1093 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1094 	 * wait until all CNQ interrupts, if any, are received. This will always
1095 	 * happen and will always happen very fast. If not, then a serious error
1096 	 * has occured. That is why we can use a long delay.
1097 	 * We spin for a short time so we don’t lose time on context switching
1098 	 * in case all the completions are handled in that span. Otherwise
1099 	 * we sleep for a while and check again. Since the CNQ may be
1100 	 * associated with (only) the current CPU we use msleep to allow the
1101 	 * current CPU to be freed.
1102 	 * The CNQ notification is increased in qedr_irq_handler().
1103 	 */
1104 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1105 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1106 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1107 		iter--;
1108 	}
1109 
1110 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1111 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1112 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1113 		iter--;
1114 	}
1115 
1116 	/* Note that we don't need to have explicit code to wait for the
1117 	 * completion of the event handler because it is invoked from the EQ.
1118 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1119 	 * be certain that there's no event handler in process.
1120 	 */
1121 	return 0;
1122 }
1123 
1124 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1125 					  struct ib_qp_attr *attr,
1126 					  int attr_mask,
1127 					  struct qed_rdma_modify_qp_in_params
1128 					  *qp_params)
1129 {
1130 	const struct ib_gid_attr *gid_attr;
1131 	enum rdma_network_type nw_type;
1132 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1133 	u32 ipv4_addr;
1134 	int ret;
1135 	int i;
1136 
1137 	gid_attr = grh->sgid_attr;
1138 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1139 	if (ret)
1140 		return ret;
1141 
1142 	nw_type = rdma_gid_attr_network_type(gid_attr);
1143 	switch (nw_type) {
1144 	case RDMA_NETWORK_IPV6:
1145 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1146 		       sizeof(qp_params->sgid));
1147 		memcpy(&qp_params->dgid.bytes[0],
1148 		       &grh->dgid,
1149 		       sizeof(qp_params->dgid));
1150 		qp_params->roce_mode = ROCE_V2_IPV6;
1151 		SET_FIELD(qp_params->modify_flags,
1152 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1153 		break;
1154 	case RDMA_NETWORK_ROCE_V1:
1155 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1156 		       sizeof(qp_params->sgid));
1157 		memcpy(&qp_params->dgid.bytes[0],
1158 		       &grh->dgid,
1159 		       sizeof(qp_params->dgid));
1160 		qp_params->roce_mode = ROCE_V1;
1161 		break;
1162 	case RDMA_NETWORK_IPV4:
1163 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1164 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1165 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1166 		qp_params->sgid.ipv4_addr = ipv4_addr;
1167 		ipv4_addr =
1168 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1169 		qp_params->dgid.ipv4_addr = ipv4_addr;
1170 		SET_FIELD(qp_params->modify_flags,
1171 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1172 		qp_params->roce_mode = ROCE_V2_IPV4;
1173 		break;
1174 	default:
1175 		return -EINVAL;
1176 	}
1177 
1178 	for (i = 0; i < 4; i++) {
1179 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1180 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1181 	}
1182 
1183 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1184 		qp_params->vlan_id = 0;
1185 
1186 	return 0;
1187 }
1188 
1189 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1190 			       struct ib_qp_init_attr *attrs,
1191 			       struct ib_udata *udata)
1192 {
1193 	struct qedr_device_attr *qattr = &dev->attr;
1194 
1195 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1196 	if (attrs->qp_type != IB_QPT_RC &&
1197 	    attrs->qp_type != IB_QPT_GSI &&
1198 	    attrs->qp_type != IB_QPT_XRC_INI &&
1199 	    attrs->qp_type != IB_QPT_XRC_TGT) {
1200 		DP_DEBUG(dev, QEDR_MSG_QP,
1201 			 "create qp: unsupported qp type=0x%x requested\n",
1202 			 attrs->qp_type);
1203 		return -EOPNOTSUPP;
1204 	}
1205 
1206 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1207 		DP_ERR(dev,
1208 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1209 		       attrs->cap.max_send_wr, qattr->max_sqe);
1210 		return -EINVAL;
1211 	}
1212 
1213 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1214 		DP_ERR(dev,
1215 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1216 		       attrs->cap.max_inline_data, qattr->max_inline);
1217 		return -EINVAL;
1218 	}
1219 
1220 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1221 		DP_ERR(dev,
1222 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1223 		       attrs->cap.max_send_sge, qattr->max_sge);
1224 		return -EINVAL;
1225 	}
1226 
1227 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1228 		DP_ERR(dev,
1229 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1230 		       attrs->cap.max_recv_sge, qattr->max_sge);
1231 		return -EINVAL;
1232 	}
1233 
1234 	/* verify consumer QPs are not trying to use GSI QP's CQ.
1235 	 * TGT QP isn't associated with RQ/SQ
1236 	 */
1237 	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
1238 	    (attrs->qp_type != IB_QPT_XRC_TGT) &&
1239 	    (attrs->qp_type != IB_QPT_XRC_INI)) {
1240 		struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
1241 		struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
1242 
1243 		if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) ||
1244 		    (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) {
1245 			DP_ERR(dev,
1246 			       "create qp: consumer QP cannot use GSI CQs.\n");
1247 			return -EINVAL;
1248 		}
1249 	}
1250 
1251 	return 0;
1252 }
1253 
1254 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1255 			       struct qedr_srq *srq, struct ib_udata *udata)
1256 {
1257 	struct qedr_create_srq_uresp uresp = {};
1258 	int rc;
1259 
1260 	uresp.srq_id = srq->srq_id;
1261 
1262 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1263 	if (rc)
1264 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1265 
1266 	return rc;
1267 }
1268 
1269 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1270 			       struct qedr_create_qp_uresp *uresp,
1271 			       struct qedr_qp *qp)
1272 {
1273 	/* iWARP requires two doorbells per RQ. */
1274 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1275 		uresp->rq_db_offset =
1276 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1277 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1278 	} else {
1279 		uresp->rq_db_offset =
1280 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1281 	}
1282 
1283 	uresp->rq_icid = qp->icid;
1284 	if (qp->urq.db_mmap_entry)
1285 		uresp->rq_db_rec_addr =
1286 			rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1287 }
1288 
1289 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1290 			       struct qedr_create_qp_uresp *uresp,
1291 			       struct qedr_qp *qp)
1292 {
1293 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1294 
1295 	/* iWARP uses the same cid for rq and sq */
1296 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1297 		uresp->sq_icid = qp->icid;
1298 	else
1299 		uresp->sq_icid = qp->icid + 1;
1300 
1301 	if (qp->usq.db_mmap_entry)
1302 		uresp->sq_db_rec_addr =
1303 			rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1304 }
1305 
1306 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1307 			      struct qedr_qp *qp, struct ib_udata *udata,
1308 			      struct qedr_create_qp_uresp *uresp)
1309 {
1310 	int rc;
1311 
1312 	memset(uresp, 0, sizeof(*uresp));
1313 
1314 	if (qedr_qp_has_sq(qp))
1315 		qedr_copy_sq_uresp(dev, uresp, qp);
1316 
1317 	if (qedr_qp_has_rq(qp))
1318 		qedr_copy_rq_uresp(dev, uresp, qp);
1319 
1320 	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1321 	uresp->qp_id = qp->qp_id;
1322 
1323 	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1324 	if (rc)
1325 		DP_ERR(dev,
1326 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1327 		       qp->icid);
1328 
1329 	return rc;
1330 }
1331 
1332 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1333 {
1334 	qed_chain_reset(&qph->pbl);
1335 	qph->prod = 0;
1336 	qph->cons = 0;
1337 	qph->wqe_cons = 0;
1338 	qph->db_data.data.value = cpu_to_le16(0);
1339 }
1340 
1341 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1342 				      struct qedr_qp *qp,
1343 				      struct qedr_pd *pd,
1344 				      struct ib_qp_init_attr *attrs)
1345 {
1346 	spin_lock_init(&qp->q_lock);
1347 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1348 		kref_init(&qp->refcnt);
1349 		init_completion(&qp->iwarp_cm_comp);
1350 		init_completion(&qp->qp_rel_comp);
1351 	}
1352 
1353 	qp->pd = pd;
1354 	qp->qp_type = attrs->qp_type;
1355 	qp->max_inline_data = attrs->cap.max_inline_data;
1356 	qp->state = QED_ROCE_QP_STATE_RESET;
1357 
1358 	qp->prev_wqe_size = 0;
1359 
1360 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1361 	qp->dev = dev;
1362 	if (qedr_qp_has_sq(qp)) {
1363 		qedr_reset_qp_hwq_info(&qp->sq);
1364 		qp->sq.max_sges = attrs->cap.max_send_sge;
1365 		qp->sq_cq = get_qedr_cq(attrs->send_cq);
1366 		DP_DEBUG(dev, QEDR_MSG_QP,
1367 			 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1368 			 qp->sq.max_sges, qp->sq_cq->icid);
1369 	}
1370 
1371 	if (attrs->srq)
1372 		qp->srq = get_qedr_srq(attrs->srq);
1373 
1374 	if (qedr_qp_has_rq(qp)) {
1375 		qedr_reset_qp_hwq_info(&qp->rq);
1376 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1377 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1378 		DP_DEBUG(dev, QEDR_MSG_QP,
1379 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1380 			 qp->rq.max_sges, qp->rq_cq->icid);
1381 	}
1382 
1383 	DP_DEBUG(dev, QEDR_MSG_QP,
1384 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1385 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1386 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1387 	DP_DEBUG(dev, QEDR_MSG_QP,
1388 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1389 		 qp->sq.max_sges, qp->sq_cq->icid);
1390 }
1391 
1392 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1393 {
1394 	int rc = 0;
1395 
1396 	if (qedr_qp_has_sq(qp)) {
1397 		qp->sq.db = dev->db_addr +
1398 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1399 		qp->sq.db_data.data.icid = qp->icid + 1;
1400 		rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data,
1401 					  DB_REC_WIDTH_32B, DB_REC_KERNEL);
1402 		if (rc)
1403 			return rc;
1404 	}
1405 
1406 	if (qedr_qp_has_rq(qp)) {
1407 		qp->rq.db = dev->db_addr +
1408 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1409 		qp->rq.db_data.data.icid = qp->icid;
1410 		rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data,
1411 					  DB_REC_WIDTH_32B, DB_REC_KERNEL);
1412 		if (rc && qedr_qp_has_sq(qp))
1413 			qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
1414 	}
1415 
1416 	return rc;
1417 }
1418 
1419 static int qedr_check_srq_params(struct qedr_dev *dev,
1420 				 struct ib_srq_init_attr *attrs,
1421 				 struct ib_udata *udata)
1422 {
1423 	struct qedr_device_attr *qattr = &dev->attr;
1424 
1425 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1426 		DP_ERR(dev,
1427 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1428 		       attrs->attr.max_wr, qattr->max_srq_wr);
1429 		return -EINVAL;
1430 	}
1431 
1432 	if (attrs->attr.max_sge > qattr->max_sge) {
1433 		DP_ERR(dev,
1434 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1435 		       attrs->attr.max_sge, qattr->max_sge);
1436 	}
1437 
1438 	if (!udata && attrs->srq_type == IB_SRQT_XRC) {
1439 		DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n");
1440 		return -EINVAL;
1441 	}
1442 
1443 	return 0;
1444 }
1445 
1446 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1447 {
1448 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1449 	ib_umem_release(srq->usrq.umem);
1450 	ib_umem_release(srq->prod_umem);
1451 }
1452 
1453 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1454 {
1455 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1456 	struct qedr_dev *dev = srq->dev;
1457 
1458 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1459 
1460 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1461 			  hw_srq->virt_prod_pair_addr,
1462 			  hw_srq->phy_prod_pair_addr);
1463 }
1464 
1465 static int qedr_init_srq_user_params(struct ib_udata *udata,
1466 				     struct qedr_srq *srq,
1467 				     struct qedr_create_srq_ureq *ureq,
1468 				     int access)
1469 {
1470 	struct scatterlist *sg;
1471 	int rc;
1472 
1473 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1474 				  ureq->srq_len, false, access, 1);
1475 	if (rc)
1476 		return rc;
1477 
1478 	srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1479 				     sizeof(struct rdma_srq_producers), access);
1480 	if (IS_ERR(srq->prod_umem)) {
1481 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1482 		ib_umem_release(srq->usrq.umem);
1483 		DP_ERR(srq->dev,
1484 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1485 		       PTR_ERR(srq->prod_umem));
1486 		return PTR_ERR(srq->prod_umem);
1487 	}
1488 
1489 	sg = srq->prod_umem->sgt_append.sgt.sgl;
1490 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1491 
1492 	return 0;
1493 }
1494 
1495 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1496 					struct qedr_dev *dev,
1497 					struct ib_srq_init_attr *init_attr)
1498 {
1499 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1500 	struct qed_chain_init_params params = {
1501 		.mode		= QED_CHAIN_MODE_PBL,
1502 		.intended_use	= QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1503 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1504 		.elem_size	= QEDR_SRQ_WQE_ELEM_SIZE,
1505 	};
1506 	dma_addr_t phy_prod_pair_addr;
1507 	u32 num_elems;
1508 	void *va;
1509 	int rc;
1510 
1511 	va = dma_alloc_coherent(&dev->pdev->dev,
1512 				sizeof(struct rdma_srq_producers),
1513 				&phy_prod_pair_addr, GFP_KERNEL);
1514 	if (!va) {
1515 		DP_ERR(dev,
1516 		       "create srq: failed to allocate dma memory for producer\n");
1517 		return -ENOMEM;
1518 	}
1519 
1520 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1521 	hw_srq->virt_prod_pair_addr = va;
1522 
1523 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1524 	params.num_elems = num_elems;
1525 
1526 	rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
1527 	if (rc)
1528 		goto err0;
1529 
1530 	hw_srq->num_elems = num_elems;
1531 
1532 	return 0;
1533 
1534 err0:
1535 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1536 			  va, phy_prod_pair_addr);
1537 	return rc;
1538 }
1539 
1540 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1541 		    struct ib_udata *udata)
1542 {
1543 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1544 	struct qed_rdma_create_srq_in_params in_params = {};
1545 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1546 	struct qed_rdma_create_srq_out_params out_params;
1547 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1548 	struct qedr_create_srq_ureq ureq = {};
1549 	u64 pbl_base_addr, phy_prod_pair_addr;
1550 	struct qedr_srq_hwq_info *hw_srq;
1551 	u32 page_cnt, page_size;
1552 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1553 	int rc = 0;
1554 
1555 	DP_DEBUG(dev, QEDR_MSG_QP,
1556 		 "create SRQ called from %s (pd %p)\n",
1557 		 (udata) ? "User lib" : "kernel", pd);
1558 
1559 	if (init_attr->srq_type != IB_SRQT_BASIC &&
1560 	    init_attr->srq_type != IB_SRQT_XRC)
1561 		return -EOPNOTSUPP;
1562 
1563 	rc = qedr_check_srq_params(dev, init_attr, udata);
1564 	if (rc)
1565 		return -EINVAL;
1566 
1567 	srq->dev = dev;
1568 	srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
1569 	hw_srq = &srq->hw_srq;
1570 	spin_lock_init(&srq->lock);
1571 
1572 	hw_srq->max_wr = init_attr->attr.max_wr;
1573 	hw_srq->max_sges = init_attr->attr.max_sge;
1574 
1575 	if (udata) {
1576 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1577 							 udata->inlen))) {
1578 			DP_ERR(dev,
1579 			       "create srq: problem copying data from user space\n");
1580 			goto err0;
1581 		}
1582 
1583 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1584 		if (rc)
1585 			goto err0;
1586 
1587 		page_cnt = srq->usrq.pbl_info.num_pbes;
1588 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1589 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1590 		page_size = PAGE_SIZE;
1591 	} else {
1592 		struct qed_chain *pbl;
1593 
1594 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1595 		if (rc)
1596 			goto err0;
1597 
1598 		pbl = &hw_srq->pbl;
1599 		page_cnt = qed_chain_get_page_cnt(pbl);
1600 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1601 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1602 		page_size = QED_CHAIN_PAGE_SIZE;
1603 	}
1604 
1605 	in_params.pd_id = pd->pd_id;
1606 	in_params.pbl_base_addr = pbl_base_addr;
1607 	in_params.prod_pair_addr = phy_prod_pair_addr;
1608 	in_params.num_pages = page_cnt;
1609 	in_params.page_size = page_size;
1610 	if (srq->is_xrc) {
1611 		struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd);
1612 		struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq);
1613 
1614 		in_params.is_xrc = 1;
1615 		in_params.xrcd_id = xrcd->xrcd_id;
1616 		in_params.cq_cid = cq->icid;
1617 	}
1618 
1619 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1620 	if (rc)
1621 		goto err1;
1622 
1623 	srq->srq_id = out_params.srq_id;
1624 
1625 	if (udata) {
1626 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1627 		if (rc)
1628 			goto err2;
1629 	}
1630 
1631 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1632 	if (rc)
1633 		goto err2;
1634 
1635 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1636 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1637 	return 0;
1638 
1639 err2:
1640 	destroy_in_params.srq_id = srq->srq_id;
1641 
1642 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1643 err1:
1644 	if (udata)
1645 		qedr_free_srq_user_params(srq);
1646 	else
1647 		qedr_free_srq_kernel_params(srq);
1648 err0:
1649 	return -EFAULT;
1650 }
1651 
1652 int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1653 {
1654 	struct qed_rdma_destroy_srq_in_params in_params = {};
1655 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1656 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1657 
1658 	xa_erase_irq(&dev->srqs, srq->srq_id);
1659 	in_params.srq_id = srq->srq_id;
1660 	in_params.is_xrc = srq->is_xrc;
1661 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1662 
1663 	if (ibsrq->uobject)
1664 		qedr_free_srq_user_params(srq);
1665 	else
1666 		qedr_free_srq_kernel_params(srq);
1667 
1668 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1669 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1670 		 srq->srq_id);
1671 	return 0;
1672 }
1673 
1674 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1675 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1676 {
1677 	struct qed_rdma_modify_srq_in_params in_params = {};
1678 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1679 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1680 	int rc;
1681 
1682 	if (attr_mask & IB_SRQ_MAX_WR) {
1683 		DP_ERR(dev,
1684 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1685 		       attr_mask, srq);
1686 		return -EINVAL;
1687 	}
1688 
1689 	if (attr_mask & IB_SRQ_LIMIT) {
1690 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1691 			DP_ERR(dev,
1692 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1693 			       attr->srq_limit, srq->hw_srq.max_wr);
1694 			return -EINVAL;
1695 		}
1696 
1697 		in_params.srq_id = srq->srq_id;
1698 		in_params.wqe_limit = attr->srq_limit;
1699 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1700 		if (rc)
1701 			return rc;
1702 	}
1703 
1704 	srq->srq_limit = attr->srq_limit;
1705 
1706 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1707 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1708 
1709 	return 0;
1710 }
1711 
1712 static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type)
1713 {
1714 	switch (ib_qp_type) {
1715 	case IB_QPT_RC:
1716 		return QED_RDMA_QP_TYPE_RC;
1717 	case IB_QPT_XRC_INI:
1718 		return QED_RDMA_QP_TYPE_XRC_INI;
1719 	case IB_QPT_XRC_TGT:
1720 		return QED_RDMA_QP_TYPE_XRC_TGT;
1721 	default:
1722 		return QED_RDMA_QP_TYPE_INVAL;
1723 	}
1724 }
1725 
1726 static inline void
1727 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1728 			      struct qedr_pd *pd,
1729 			      struct qedr_qp *qp,
1730 			      struct ib_qp_init_attr *attrs,
1731 			      bool fmr_and_reserved_lkey,
1732 			      struct qed_rdma_create_qp_in_params *params)
1733 {
1734 	/* QP handle to be written in an async event */
1735 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1736 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1737 
1738 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1739 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1740 	params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type);
1741 	params->stats_queue = 0;
1742 
1743 	if (pd) {
1744 		params->pd = pd->pd_id;
1745 		params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1746 	}
1747 
1748 	if (qedr_qp_has_sq(qp))
1749 		params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1750 
1751 	if (qedr_qp_has_rq(qp))
1752 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1753 
1754 	if (qedr_qp_has_srq(qp)) {
1755 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1756 		params->srq_id = qp->srq->srq_id;
1757 		params->use_srq = true;
1758 	} else {
1759 		params->srq_id = 0;
1760 		params->use_srq = false;
1761 	}
1762 }
1763 
1764 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1765 {
1766 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1767 		 "qp=%p. "
1768 		 "sq_addr=0x%llx, "
1769 		 "sq_len=%zd, "
1770 		 "rq_addr=0x%llx, "
1771 		 "rq_len=%zd"
1772 		 "\n",
1773 		 qp,
1774 		 qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0,
1775 		 qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0,
1776 		 qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0,
1777 		 qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0);
1778 }
1779 
1780 static inline void
1781 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1782 			    struct qedr_qp *qp,
1783 			    struct qed_rdma_create_qp_out_params *out_params)
1784 {
1785 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1786 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1787 
1788 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1789 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1790 	if (!qp->srq) {
1791 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1792 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1793 	}
1794 
1795 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1796 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1797 }
1798 
1799 static void qedr_cleanup_user(struct qedr_dev *dev,
1800 			      struct qedr_ucontext *ctx,
1801 			      struct qedr_qp *qp)
1802 {
1803 	if (qedr_qp_has_sq(qp)) {
1804 		ib_umem_release(qp->usq.umem);
1805 		qp->usq.umem = NULL;
1806 	}
1807 
1808 	if (qedr_qp_has_rq(qp)) {
1809 		ib_umem_release(qp->urq.umem);
1810 		qp->urq.umem = NULL;
1811 	}
1812 
1813 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1814 		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1815 		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1816 	} else {
1817 		kfree(qp->usq.pbl_tbl);
1818 		kfree(qp->urq.pbl_tbl);
1819 	}
1820 
1821 	if (qp->usq.db_rec_data) {
1822 		qedr_db_recovery_del(dev, qp->usq.db_addr,
1823 				     &qp->usq.db_rec_data->db_data);
1824 		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1825 	}
1826 
1827 	if (qp->urq.db_rec_data) {
1828 		qedr_db_recovery_del(dev, qp->urq.db_addr,
1829 				     &qp->urq.db_rec_data->db_data);
1830 		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1831 	}
1832 
1833 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1834 		qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1835 				     &qp->urq.db_rec_db2_data);
1836 }
1837 
1838 static int qedr_create_user_qp(struct qedr_dev *dev,
1839 			       struct qedr_qp *qp,
1840 			       struct ib_pd *ibpd,
1841 			       struct ib_udata *udata,
1842 			       struct ib_qp_init_attr *attrs)
1843 {
1844 	struct qed_rdma_create_qp_in_params in_params;
1845 	struct qed_rdma_create_qp_out_params out_params;
1846 	struct qedr_create_qp_uresp uresp = {};
1847 	struct qedr_create_qp_ureq ureq = {};
1848 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1849 	struct qedr_ucontext *ctx = NULL;
1850 	struct qedr_pd *pd = NULL;
1851 	int rc = 0;
1852 
1853 	qp->create_type = QEDR_QP_CREATE_USER;
1854 
1855 	if (ibpd) {
1856 		pd = get_qedr_pd(ibpd);
1857 		ctx = pd->uctx;
1858 	}
1859 
1860 	if (udata) {
1861 		rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1862 					udata->inlen));
1863 		if (rc) {
1864 			DP_ERR(dev, "Problem copying data from user space\n");
1865 			return rc;
1866 		}
1867 	}
1868 
1869 	if (qedr_qp_has_sq(qp)) {
1870 		/* SQ - read access only (0) */
1871 		rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1872 					  ureq.sq_len, true, 0, alloc_and_init);
1873 		if (rc)
1874 			return rc;
1875 	}
1876 
1877 	if (qedr_qp_has_rq(qp)) {
1878 		/* RQ - read access only (0) */
1879 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1880 					  ureq.rq_len, true, 0, alloc_and_init);
1881 		if (rc)
1882 			return rc;
1883 	}
1884 
1885 	memset(&in_params, 0, sizeof(in_params));
1886 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1887 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1888 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1889 
1890 	if (qp->qp_type == IB_QPT_XRC_TGT) {
1891 		struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd);
1892 
1893 		in_params.xrcd_id = xrcd->xrcd_id;
1894 		in_params.qp_handle_lo = qp->qp_id;
1895 		in_params.use_srq = 1;
1896 	}
1897 
1898 	if (qedr_qp_has_sq(qp)) {
1899 		in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1900 		in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1901 	}
1902 
1903 	if (qedr_qp_has_rq(qp)) {
1904 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1905 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1906 	}
1907 
1908 	if (ctx)
1909 		SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
1910 
1911 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1912 					      &in_params, &out_params);
1913 
1914 	if (!qp->qed_qp) {
1915 		rc = -ENOMEM;
1916 		goto err1;
1917 	}
1918 
1919 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1920 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1921 
1922 	qp->qp_id = out_params.qp_id;
1923 	qp->icid = out_params.icid;
1924 
1925 	if (udata) {
1926 		rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1927 		if (rc)
1928 			goto err;
1929 	}
1930 
1931 	/* db offset was calculated in copy_qp_uresp, now set in the user q */
1932 	if (qedr_qp_has_sq(qp)) {
1933 		qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1934 		qp->sq.max_wr = attrs->cap.max_send_wr;
1935 		rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1936 					  &qp->usq.db_rec_data->db_data,
1937 					  DB_REC_WIDTH_32B,
1938 					  DB_REC_USER);
1939 		if (rc)
1940 			goto err;
1941 	}
1942 
1943 	if (qedr_qp_has_rq(qp)) {
1944 		qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1945 		qp->rq.max_wr = attrs->cap.max_recv_wr;
1946 		rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1947 					  &qp->urq.db_rec_data->db_data,
1948 					  DB_REC_WIDTH_32B,
1949 					  DB_REC_USER);
1950 		if (rc)
1951 			goto err;
1952 	}
1953 
1954 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1955 		qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
1956 
1957 		/* calculate the db_rec_db2 data since it is constant so no
1958 		 * need to reflect from user
1959 		 */
1960 		qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
1961 		qp->urq.db_rec_db2_data.data.value =
1962 			cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
1963 
1964 		rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1965 					  &qp->urq.db_rec_db2_data,
1966 					  DB_REC_WIDTH_32B,
1967 					  DB_REC_USER);
1968 		if (rc)
1969 			goto err;
1970 	}
1971 	qedr_qp_user_print(dev, qp);
1972 	return rc;
1973 err:
1974 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1975 	if (rc)
1976 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1977 
1978 err1:
1979 	qedr_cleanup_user(dev, ctx, qp);
1980 	return rc;
1981 }
1982 
1983 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1984 {
1985 	int rc;
1986 
1987 	qp->sq.db = dev->db_addr +
1988 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1989 	qp->sq.db_data.data.icid = qp->icid;
1990 
1991 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1992 				  &qp->sq.db_data,
1993 				  DB_REC_WIDTH_32B,
1994 				  DB_REC_KERNEL);
1995 	if (rc)
1996 		return rc;
1997 
1998 	qp->rq.db = dev->db_addr +
1999 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
2000 	qp->rq.db_data.data.icid = qp->icid;
2001 	qp->rq.iwarp_db2 = dev->db_addr +
2002 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
2003 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
2004 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
2005 
2006 	rc = qedr_db_recovery_add(dev, qp->rq.db,
2007 				  &qp->rq.db_data,
2008 				  DB_REC_WIDTH_32B,
2009 				  DB_REC_KERNEL);
2010 	if (rc)
2011 		return rc;
2012 
2013 	rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
2014 				  &qp->rq.iwarp_db2_data,
2015 				  DB_REC_WIDTH_32B,
2016 				  DB_REC_KERNEL);
2017 	return rc;
2018 }
2019 
2020 static int
2021 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
2022 			   struct qedr_qp *qp,
2023 			   struct qed_rdma_create_qp_in_params *in_params,
2024 			   u32 n_sq_elems, u32 n_rq_elems)
2025 {
2026 	struct qed_rdma_create_qp_out_params out_params;
2027 	struct qed_chain_init_params params = {
2028 		.mode		= QED_CHAIN_MODE_PBL,
2029 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
2030 	};
2031 	int rc;
2032 
2033 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
2034 	params.num_elems = n_sq_elems;
2035 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
2036 
2037 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
2038 	if (rc)
2039 		return rc;
2040 
2041 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
2042 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
2043 
2044 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
2045 	params.num_elems = n_rq_elems;
2046 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
2047 
2048 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2049 	if (rc)
2050 		return rc;
2051 
2052 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
2053 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
2054 
2055 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
2056 					      in_params, &out_params);
2057 
2058 	if (!qp->qed_qp)
2059 		return -EINVAL;
2060 
2061 	qp->qp_id = out_params.qp_id;
2062 	qp->icid = out_params.icid;
2063 
2064 	return qedr_set_roce_db_info(dev, qp);
2065 }
2066 
2067 static int
2068 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
2069 			    struct qedr_qp *qp,
2070 			    struct qed_rdma_create_qp_in_params *in_params,
2071 			    u32 n_sq_elems, u32 n_rq_elems)
2072 {
2073 	struct qed_rdma_create_qp_out_params out_params;
2074 	struct qed_chain_init_params params = {
2075 		.mode		= QED_CHAIN_MODE_PBL,
2076 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
2077 	};
2078 	int rc;
2079 
2080 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
2081 						     QEDR_SQE_ELEMENT_SIZE,
2082 						     QED_CHAIN_PAGE_SIZE,
2083 						     QED_CHAIN_MODE_PBL);
2084 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
2085 						     QEDR_RQE_ELEMENT_SIZE,
2086 						     QED_CHAIN_PAGE_SIZE,
2087 						     QED_CHAIN_MODE_PBL);
2088 
2089 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
2090 					      in_params, &out_params);
2091 
2092 	if (!qp->qed_qp)
2093 		return -EINVAL;
2094 
2095 	/* Now we allocate the chain */
2096 
2097 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
2098 	params.num_elems = n_sq_elems;
2099 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
2100 	params.ext_pbl_virt = out_params.sq_pbl_virt;
2101 	params.ext_pbl_phys = out_params.sq_pbl_phys;
2102 
2103 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
2104 	if (rc)
2105 		goto err;
2106 
2107 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
2108 	params.num_elems = n_rq_elems;
2109 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
2110 	params.ext_pbl_virt = out_params.rq_pbl_virt;
2111 	params.ext_pbl_phys = out_params.rq_pbl_phys;
2112 
2113 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2114 	if (rc)
2115 		goto err;
2116 
2117 	qp->qp_id = out_params.qp_id;
2118 	qp->icid = out_params.icid;
2119 
2120 	return qedr_set_iwarp_db_info(dev, qp);
2121 
2122 err:
2123 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2124 
2125 	return rc;
2126 }
2127 
2128 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2129 {
2130 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2131 	kfree(qp->wqe_wr_id);
2132 
2133 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2134 	kfree(qp->rqe_wr_id);
2135 
2136 	/* GSI qp is not registered to db mechanism so no need to delete */
2137 	if (qp->qp_type == IB_QPT_GSI)
2138 		return;
2139 
2140 	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2141 
2142 	if (!qp->srq) {
2143 		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2144 
2145 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2146 			qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2147 					     &qp->rq.iwarp_db2_data);
2148 	}
2149 }
2150 
2151 static int qedr_create_kernel_qp(struct qedr_dev *dev,
2152 				 struct qedr_qp *qp,
2153 				 struct ib_pd *ibpd,
2154 				 struct ib_qp_init_attr *attrs)
2155 {
2156 	struct qed_rdma_create_qp_in_params in_params;
2157 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2158 	int rc = -EINVAL;
2159 	u32 n_rq_elems;
2160 	u32 n_sq_elems;
2161 	u32 n_sq_entries;
2162 
2163 	memset(&in_params, 0, sizeof(in_params));
2164 	qp->create_type = QEDR_QP_CREATE_KERNEL;
2165 
2166 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2167 	 * the ring. The ring should allow at least a single WR, even if the
2168 	 * user requested none, due to allocation issues.
2169 	 * We should add an extra WR since the prod and cons indices of
2170 	 * wqe_wr_id are managed in such a way that the WQ is considered full
2171 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
2172 	 * double the number of entries due an iSER issue that pushes far more
2173 	 * WRs than indicated. If we decline its ib_post_send() then we get
2174 	 * error prints in the dmesg we'd like to avoid.
2175 	 */
2176 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2177 			      dev->attr.max_sqe);
2178 
2179 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2180 				GFP_KERNEL);
2181 	if (!qp->wqe_wr_id) {
2182 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2183 		return -ENOMEM;
2184 	}
2185 
2186 	/* QP handle to be written in CQE */
2187 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2188 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2189 
2190 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2191 	 * the ring. There ring should allow at least a single WR, even if the
2192 	 * user requested none, due to allocation issues.
2193 	 */
2194 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2195 
2196 	/* Allocate driver internal RQ array */
2197 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2198 				GFP_KERNEL);
2199 	if (!qp->rqe_wr_id) {
2200 		DP_ERR(dev,
2201 		       "create qp: failed RQ shadow memory allocation\n");
2202 		kfree(qp->wqe_wr_id);
2203 		return -ENOMEM;
2204 	}
2205 
2206 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2207 
2208 	n_sq_entries = attrs->cap.max_send_wr;
2209 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2210 	n_sq_entries = max_t(u32, n_sq_entries, 1);
2211 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2212 
2213 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2214 
2215 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2216 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2217 						 n_sq_elems, n_rq_elems);
2218 	else
2219 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2220 						n_sq_elems, n_rq_elems);
2221 	if (rc)
2222 		qedr_cleanup_kernel(dev, qp);
2223 
2224 	return rc;
2225 }
2226 
2227 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2228 				  struct ib_udata *udata)
2229 {
2230 	struct qedr_ucontext *ctx =
2231 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2232 					  ibucontext);
2233 	int rc;
2234 
2235 	if (qp->qp_type != IB_QPT_GSI) {
2236 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2237 		if (rc)
2238 			return rc;
2239 	}
2240 
2241 	if (qp->create_type == QEDR_QP_CREATE_USER)
2242 		qedr_cleanup_user(dev, ctx, qp);
2243 	else
2244 		qedr_cleanup_kernel(dev, qp);
2245 
2246 	return 0;
2247 }
2248 
2249 int qedr_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
2250 		   struct ib_udata *udata)
2251 {
2252 	struct qedr_xrcd *xrcd = NULL;
2253 	struct ib_pd *ibpd = ibqp->pd;
2254 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2255 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2256 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2257 	int rc = 0;
2258 
2259 	if (attrs->create_flags)
2260 		return -EOPNOTSUPP;
2261 
2262 	if (attrs->qp_type == IB_QPT_XRC_TGT)
2263 		xrcd = get_qedr_xrcd(attrs->xrcd);
2264 	else
2265 		pd = get_qedr_pd(ibpd);
2266 
2267 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2268 		 udata ? "user library" : "kernel", pd);
2269 
2270 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2271 	if (rc)
2272 		return rc;
2273 
2274 	DP_DEBUG(dev, QEDR_MSG_QP,
2275 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2276 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2277 		 get_qedr_cq(attrs->send_cq),
2278 		 get_qedr_cq(attrs->send_cq)->icid,
2279 		 get_qedr_cq(attrs->recv_cq),
2280 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2281 
2282 	qedr_set_common_qp_params(dev, qp, pd, attrs);
2283 
2284 	if (attrs->qp_type == IB_QPT_GSI)
2285 		return qedr_create_gsi_qp(dev, attrs, qp);
2286 
2287 	if (udata || xrcd)
2288 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2289 	else
2290 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2291 
2292 	if (rc)
2293 		return rc;
2294 
2295 	qp->ibqp.qp_num = qp->qp_id;
2296 
2297 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2298 		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2299 		if (rc)
2300 			goto out_free_qp_resources;
2301 	}
2302 
2303 	return 0;
2304 
2305 out_free_qp_resources:
2306 	qedr_free_qp_resources(dev, qp, udata);
2307 	return -EFAULT;
2308 }
2309 
2310 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2311 {
2312 	switch (qp_state) {
2313 	case QED_ROCE_QP_STATE_RESET:
2314 		return IB_QPS_RESET;
2315 	case QED_ROCE_QP_STATE_INIT:
2316 		return IB_QPS_INIT;
2317 	case QED_ROCE_QP_STATE_RTR:
2318 		return IB_QPS_RTR;
2319 	case QED_ROCE_QP_STATE_RTS:
2320 		return IB_QPS_RTS;
2321 	case QED_ROCE_QP_STATE_SQD:
2322 		return IB_QPS_SQD;
2323 	case QED_ROCE_QP_STATE_ERR:
2324 		return IB_QPS_ERR;
2325 	case QED_ROCE_QP_STATE_SQE:
2326 		return IB_QPS_SQE;
2327 	}
2328 	return IB_QPS_ERR;
2329 }
2330 
2331 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2332 					enum ib_qp_state qp_state)
2333 {
2334 	switch (qp_state) {
2335 	case IB_QPS_RESET:
2336 		return QED_ROCE_QP_STATE_RESET;
2337 	case IB_QPS_INIT:
2338 		return QED_ROCE_QP_STATE_INIT;
2339 	case IB_QPS_RTR:
2340 		return QED_ROCE_QP_STATE_RTR;
2341 	case IB_QPS_RTS:
2342 		return QED_ROCE_QP_STATE_RTS;
2343 	case IB_QPS_SQD:
2344 		return QED_ROCE_QP_STATE_SQD;
2345 	case IB_QPS_ERR:
2346 		return QED_ROCE_QP_STATE_ERR;
2347 	default:
2348 		return QED_ROCE_QP_STATE_ERR;
2349 	}
2350 }
2351 
2352 static int qedr_update_qp_state(struct qedr_dev *dev,
2353 				struct qedr_qp *qp,
2354 				enum qed_roce_qp_state cur_state,
2355 				enum qed_roce_qp_state new_state)
2356 {
2357 	int status = 0;
2358 
2359 	if (new_state == cur_state)
2360 		return 0;
2361 
2362 	switch (cur_state) {
2363 	case QED_ROCE_QP_STATE_RESET:
2364 		switch (new_state) {
2365 		case QED_ROCE_QP_STATE_INIT:
2366 			break;
2367 		default:
2368 			status = -EINVAL;
2369 			break;
2370 		}
2371 		break;
2372 	case QED_ROCE_QP_STATE_INIT:
2373 		switch (new_state) {
2374 		case QED_ROCE_QP_STATE_RTR:
2375 			/* Update doorbell (in case post_recv was
2376 			 * done before move to RTR)
2377 			 */
2378 
2379 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2380 				writel(qp->rq.db_data.raw, qp->rq.db);
2381 			}
2382 			break;
2383 		case QED_ROCE_QP_STATE_ERR:
2384 			break;
2385 		default:
2386 			/* Invalid state change. */
2387 			status = -EINVAL;
2388 			break;
2389 		}
2390 		break;
2391 	case QED_ROCE_QP_STATE_RTR:
2392 		/* RTR->XXX */
2393 		switch (new_state) {
2394 		case QED_ROCE_QP_STATE_RTS:
2395 			break;
2396 		case QED_ROCE_QP_STATE_ERR:
2397 			break;
2398 		default:
2399 			/* Invalid state change. */
2400 			status = -EINVAL;
2401 			break;
2402 		}
2403 		break;
2404 	case QED_ROCE_QP_STATE_RTS:
2405 		/* RTS->XXX */
2406 		switch (new_state) {
2407 		case QED_ROCE_QP_STATE_SQD:
2408 			break;
2409 		case QED_ROCE_QP_STATE_ERR:
2410 			break;
2411 		default:
2412 			/* Invalid state change. */
2413 			status = -EINVAL;
2414 			break;
2415 		}
2416 		break;
2417 	case QED_ROCE_QP_STATE_SQD:
2418 		/* SQD->XXX */
2419 		switch (new_state) {
2420 		case QED_ROCE_QP_STATE_RTS:
2421 		case QED_ROCE_QP_STATE_ERR:
2422 			break;
2423 		default:
2424 			/* Invalid state change. */
2425 			status = -EINVAL;
2426 			break;
2427 		}
2428 		break;
2429 	case QED_ROCE_QP_STATE_ERR:
2430 		/* ERR->XXX */
2431 		switch (new_state) {
2432 		case QED_ROCE_QP_STATE_RESET:
2433 			if ((qp->rq.prod != qp->rq.cons) ||
2434 			    (qp->sq.prod != qp->sq.cons)) {
2435 				DP_NOTICE(dev,
2436 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2437 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2438 					  qp->sq.cons);
2439 				status = -EINVAL;
2440 			}
2441 			break;
2442 		default:
2443 			status = -EINVAL;
2444 			break;
2445 		}
2446 		break;
2447 	default:
2448 		status = -EINVAL;
2449 		break;
2450 	}
2451 
2452 	return status;
2453 }
2454 
2455 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2456 		   int attr_mask, struct ib_udata *udata)
2457 {
2458 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2459 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2460 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2461 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2462 	enum ib_qp_state old_qp_state, new_qp_state;
2463 	enum qed_roce_qp_state cur_state;
2464 	int rc = 0;
2465 
2466 	DP_DEBUG(dev, QEDR_MSG_QP,
2467 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2468 		 attr->qp_state);
2469 
2470 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
2471 		return -EOPNOTSUPP;
2472 
2473 	old_qp_state = qedr_get_ibqp_state(qp->state);
2474 	if (attr_mask & IB_QP_STATE)
2475 		new_qp_state = attr->qp_state;
2476 	else
2477 		new_qp_state = old_qp_state;
2478 
2479 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2480 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2481 					ibqp->qp_type, attr_mask)) {
2482 			DP_ERR(dev,
2483 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2484 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2485 			       attr_mask, qp->qp_id, ibqp->qp_type,
2486 			       old_qp_state, new_qp_state);
2487 			rc = -EINVAL;
2488 			goto err;
2489 		}
2490 	}
2491 
2492 	/* Translate the masks... */
2493 	if (attr_mask & IB_QP_STATE) {
2494 		SET_FIELD(qp_params.modify_flags,
2495 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2496 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2497 	}
2498 
2499 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2500 		qp_params.sqd_async = true;
2501 
2502 	if (attr_mask & IB_QP_PKEY_INDEX) {
2503 		SET_FIELD(qp_params.modify_flags,
2504 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2505 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2506 			rc = -EINVAL;
2507 			goto err;
2508 		}
2509 
2510 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2511 	}
2512 
2513 	if (attr_mask & IB_QP_QKEY)
2514 		qp->qkey = attr->qkey;
2515 
2516 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2517 		SET_FIELD(qp_params.modify_flags,
2518 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2519 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2520 						  IB_ACCESS_REMOTE_READ;
2521 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2522 						   IB_ACCESS_REMOTE_WRITE;
2523 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2524 					       IB_ACCESS_REMOTE_ATOMIC;
2525 	}
2526 
2527 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2528 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2529 			return -EINVAL;
2530 
2531 		if (attr_mask & IB_QP_PATH_MTU) {
2532 			if (attr->path_mtu < IB_MTU_256 ||
2533 			    attr->path_mtu > IB_MTU_4096) {
2534 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2535 				rc = -EINVAL;
2536 				goto err;
2537 			}
2538 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2539 				      ib_mtu_enum_to_int(iboe_get_mtu
2540 							 (dev->ndev->mtu)));
2541 		}
2542 
2543 		if (!qp->mtu) {
2544 			qp->mtu =
2545 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2546 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2547 		}
2548 
2549 		SET_FIELD(qp_params.modify_flags,
2550 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2551 
2552 		qp_params.traffic_class_tos = grh->traffic_class;
2553 		qp_params.flow_label = grh->flow_label;
2554 		qp_params.hop_limit_ttl = grh->hop_limit;
2555 
2556 		qp->sgid_idx = grh->sgid_index;
2557 
2558 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2559 		if (rc) {
2560 			DP_ERR(dev,
2561 			       "modify qp: problems with GID index %d (rc=%d)\n",
2562 			       grh->sgid_index, rc);
2563 			return rc;
2564 		}
2565 
2566 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2567 				   qp_params.remote_mac_addr);
2568 		if (rc)
2569 			return rc;
2570 
2571 		qp_params.use_local_mac = true;
2572 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2573 
2574 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2575 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2576 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2577 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2578 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2579 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2580 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2581 			 qp_params.remote_mac_addr);
2582 
2583 		qp_params.mtu = qp->mtu;
2584 		qp_params.lb_indication = false;
2585 	}
2586 
2587 	if (!qp_params.mtu) {
2588 		/* Stay with current MTU */
2589 		if (qp->mtu)
2590 			qp_params.mtu = qp->mtu;
2591 		else
2592 			qp_params.mtu =
2593 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2594 	}
2595 
2596 	if (attr_mask & IB_QP_TIMEOUT) {
2597 		SET_FIELD(qp_params.modify_flags,
2598 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2599 
2600 		/* The received timeout value is an exponent used like this:
2601 		 *    "12.7.34 LOCAL ACK TIMEOUT
2602 		 *    Value representing the transport (ACK) timeout for use by
2603 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2604 		 * The FW expects timeout in msec so we need to divide the usec
2605 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2606 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2607 		 * The value of zero means infinite so we use a 'max_t' to make
2608 		 * sure that sub 1 msec values will be configured as 1 msec.
2609 		 */
2610 		if (attr->timeout)
2611 			qp_params.ack_timeout =
2612 					1 << max_t(int, attr->timeout - 8, 0);
2613 		else
2614 			qp_params.ack_timeout = 0;
2615 	}
2616 
2617 	if (attr_mask & IB_QP_RETRY_CNT) {
2618 		SET_FIELD(qp_params.modify_flags,
2619 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2620 		qp_params.retry_cnt = attr->retry_cnt;
2621 	}
2622 
2623 	if (attr_mask & IB_QP_RNR_RETRY) {
2624 		SET_FIELD(qp_params.modify_flags,
2625 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2626 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2627 	}
2628 
2629 	if (attr_mask & IB_QP_RQ_PSN) {
2630 		SET_FIELD(qp_params.modify_flags,
2631 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2632 		qp_params.rq_psn = attr->rq_psn;
2633 		qp->rq_psn = attr->rq_psn;
2634 	}
2635 
2636 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2637 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2638 			rc = -EINVAL;
2639 			DP_ERR(dev,
2640 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2641 			       attr->max_rd_atomic,
2642 			       dev->attr.max_qp_req_rd_atomic_resc);
2643 			goto err;
2644 		}
2645 
2646 		SET_FIELD(qp_params.modify_flags,
2647 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2648 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2649 	}
2650 
2651 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2652 		SET_FIELD(qp_params.modify_flags,
2653 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2654 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2655 	}
2656 
2657 	if (attr_mask & IB_QP_SQ_PSN) {
2658 		SET_FIELD(qp_params.modify_flags,
2659 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2660 		qp_params.sq_psn = attr->sq_psn;
2661 		qp->sq_psn = attr->sq_psn;
2662 	}
2663 
2664 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2665 		if (attr->max_dest_rd_atomic >
2666 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2667 			DP_ERR(dev,
2668 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2669 			       attr->max_dest_rd_atomic,
2670 			       dev->attr.max_qp_resp_rd_atomic_resc);
2671 
2672 			rc = -EINVAL;
2673 			goto err;
2674 		}
2675 
2676 		SET_FIELD(qp_params.modify_flags,
2677 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2678 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2679 	}
2680 
2681 	if (attr_mask & IB_QP_DEST_QPN) {
2682 		SET_FIELD(qp_params.modify_flags,
2683 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2684 
2685 		qp_params.dest_qp = attr->dest_qp_num;
2686 		qp->dest_qp_num = attr->dest_qp_num;
2687 	}
2688 
2689 	cur_state = qp->state;
2690 
2691 	/* Update the QP state before the actual ramrod to prevent a race with
2692 	 * fast path. Modifying the QP state to error will cause the device to
2693 	 * flush the CQEs and while polling the flushed CQEs will considered as
2694 	 * a potential issue if the QP isn't in error state.
2695 	 */
2696 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2697 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2698 		qp->state = QED_ROCE_QP_STATE_ERR;
2699 
2700 	if (qp->qp_type != IB_QPT_GSI)
2701 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2702 					      qp->qed_qp, &qp_params);
2703 
2704 	if (attr_mask & IB_QP_STATE) {
2705 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2706 			rc = qedr_update_qp_state(dev, qp, cur_state,
2707 						  qp_params.new_state);
2708 		qp->state = qp_params.new_state;
2709 	}
2710 
2711 err:
2712 	return rc;
2713 }
2714 
2715 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2716 {
2717 	int ib_qp_acc_flags = 0;
2718 
2719 	if (params->incoming_rdma_write_en)
2720 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2721 	if (params->incoming_rdma_read_en)
2722 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2723 	if (params->incoming_atomic_en)
2724 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2725 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2726 	return ib_qp_acc_flags;
2727 }
2728 
2729 int qedr_query_qp(struct ib_qp *ibqp,
2730 		  struct ib_qp_attr *qp_attr,
2731 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2732 {
2733 	struct qed_rdma_query_qp_out_params params;
2734 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2735 	struct qedr_dev *dev = qp->dev;
2736 	int rc = 0;
2737 
2738 	memset(&params, 0, sizeof(params));
2739 	memset(qp_attr, 0, sizeof(*qp_attr));
2740 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2741 
2742 	if (qp->qp_type != IB_QPT_GSI) {
2743 		rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2744 		if (rc)
2745 			goto err;
2746 		qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2747 	} else {
2748 		qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS);
2749 	}
2750 
2751 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2752 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2753 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2754 	qp_attr->rq_psn = params.rq_psn;
2755 	qp_attr->sq_psn = params.sq_psn;
2756 	qp_attr->dest_qp_num = params.dest_qp;
2757 
2758 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2759 
2760 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2761 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2762 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2763 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2764 	qp_attr->cap.max_inline_data = dev->attr.max_inline;
2765 	qp_init_attr->cap = qp_attr->cap;
2766 
2767 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2768 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2769 			params.flow_label, qp->sgid_idx,
2770 			params.hop_limit_ttl, params.traffic_class_tos);
2771 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2772 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2773 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2774 	qp_attr->timeout = params.timeout;
2775 	qp_attr->rnr_retry = params.rnr_retry;
2776 	qp_attr->retry_cnt = params.retry_cnt;
2777 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2778 	qp_attr->pkey_index = params.pkey_index;
2779 	qp_attr->port_num = 1;
2780 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2781 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2782 	qp_attr->alt_pkey_index = 0;
2783 	qp_attr->alt_port_num = 0;
2784 	qp_attr->alt_timeout = 0;
2785 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2786 
2787 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2788 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2789 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2790 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2791 
2792 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2793 		 qp_attr->cap.max_inline_data);
2794 
2795 err:
2796 	return rc;
2797 }
2798 
2799 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2800 {
2801 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2802 	struct qedr_dev *dev = qp->dev;
2803 	struct ib_qp_attr attr;
2804 	int attr_mask = 0;
2805 
2806 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2807 		 qp, qp->qp_type);
2808 
2809 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2810 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2811 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2812 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2813 
2814 			attr.qp_state = IB_QPS_ERR;
2815 			attr_mask |= IB_QP_STATE;
2816 
2817 			/* Change the QP state to ERROR */
2818 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2819 		}
2820 	} else {
2821 		/* If connection establishment started the WAIT_FOR_CONNECT
2822 		 * bit will be on and we need to Wait for the establishment
2823 		 * to complete before destroying the qp.
2824 		 */
2825 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2826 				     &qp->iwarp_cm_flags))
2827 			wait_for_completion(&qp->iwarp_cm_comp);
2828 
2829 		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2830 		 * bit will be on, and we need to wait for the disconnect to
2831 		 * complete before continuing. We can use the same completion,
2832 		 * iwarp_cm_comp, since this is the only place that waits for
2833 		 * this completion and it is sequential. In addition,
2834 		 * disconnect can't occur before the connection is fully
2835 		 * established, therefore if WAIT_FOR_DISCONNECT is on it
2836 		 * means WAIT_FOR_CONNECT is also on and the completion for
2837 		 * CONNECT already occurred.
2838 		 */
2839 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2840 				     &qp->iwarp_cm_flags))
2841 			wait_for_completion(&qp->iwarp_cm_comp);
2842 	}
2843 
2844 	if (qp->qp_type == IB_QPT_GSI)
2845 		qedr_destroy_gsi_qp(dev);
2846 
2847 	/* We need to remove the entry from the xarray before we release the
2848 	 * qp_id to avoid a race of the qp_id being reallocated and failing
2849 	 * on xa_insert
2850 	 */
2851 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2852 		xa_erase(&dev->qps, qp->qp_id);
2853 
2854 	qedr_free_qp_resources(dev, qp, udata);
2855 
2856 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2857 		qedr_iw_qp_rem_ref(&qp->ibqp);
2858 		wait_for_completion(&qp->qp_rel_comp);
2859 	}
2860 
2861 	return 0;
2862 }
2863 
2864 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2865 		   struct ib_udata *udata)
2866 {
2867 	struct qedr_ah *ah = get_qedr_ah(ibah);
2868 
2869 	rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
2870 
2871 	return 0;
2872 }
2873 
2874 int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2875 {
2876 	struct qedr_ah *ah = get_qedr_ah(ibah);
2877 
2878 	rdma_destroy_ah_attr(&ah->attr);
2879 	return 0;
2880 }
2881 
2882 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2883 {
2884 	struct qedr_pbl *pbl, *tmp;
2885 
2886 	if (info->pbl_table)
2887 		list_add_tail(&info->pbl_table->list_entry,
2888 			      &info->free_pbl_list);
2889 
2890 	if (!list_empty(&info->inuse_pbl_list))
2891 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2892 
2893 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2894 		list_del(&pbl->list_entry);
2895 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2896 	}
2897 }
2898 
2899 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2900 			size_t page_list_len, bool two_layered)
2901 {
2902 	struct qedr_pbl *tmp;
2903 	int rc;
2904 
2905 	INIT_LIST_HEAD(&info->free_pbl_list);
2906 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2907 
2908 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2909 				  page_list_len, two_layered);
2910 	if (rc)
2911 		goto done;
2912 
2913 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2914 	if (IS_ERR(info->pbl_table)) {
2915 		rc = PTR_ERR(info->pbl_table);
2916 		goto done;
2917 	}
2918 
2919 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2920 		 &info->pbl_table->pa);
2921 
2922 	/* in usual case we use 2 PBLs, so we add one to free
2923 	 * list and allocating another one
2924 	 */
2925 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2926 	if (IS_ERR(tmp)) {
2927 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2928 		goto done;
2929 	}
2930 
2931 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2932 
2933 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2934 
2935 done:
2936 	if (rc)
2937 		free_mr_info(dev, info);
2938 
2939 	return rc;
2940 }
2941 
2942 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2943 			       u64 usr_addr, int acc, struct ib_udata *udata)
2944 {
2945 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2946 	struct qedr_mr *mr;
2947 	struct qedr_pd *pd;
2948 	int rc = -ENOMEM;
2949 
2950 	pd = get_qedr_pd(ibpd);
2951 	DP_DEBUG(dev, QEDR_MSG_MR,
2952 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2953 		 pd->pd_id, start, len, usr_addr, acc);
2954 
2955 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2956 		return ERR_PTR(-EINVAL);
2957 
2958 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2959 	if (!mr)
2960 		return ERR_PTR(rc);
2961 
2962 	mr->type = QEDR_MR_USER;
2963 
2964 	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2965 	if (IS_ERR(mr->umem)) {
2966 		rc = -EFAULT;
2967 		goto err0;
2968 	}
2969 
2970 	rc = init_mr_info(dev, &mr->info,
2971 			  ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1);
2972 	if (rc)
2973 		goto err1;
2974 
2975 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2976 			   &mr->info.pbl_info, PAGE_SHIFT);
2977 
2978 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2979 	if (rc) {
2980 		if (rc == -EINVAL)
2981 			DP_ERR(dev, "Out of MR resources\n");
2982 		else
2983 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
2984 
2985 		goto err1;
2986 	}
2987 
2988 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2989 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2990 	mr->hw_mr.key = 0;
2991 	mr->hw_mr.pd = pd->pd_id;
2992 	mr->hw_mr.local_read = 1;
2993 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2994 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2995 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2996 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2997 	mr->hw_mr.mw_bind = false;
2998 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2999 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
3000 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
3001 	mr->hw_mr.page_size_log = PAGE_SHIFT;
3002 	mr->hw_mr.length = len;
3003 	mr->hw_mr.vaddr = usr_addr;
3004 	mr->hw_mr.phy_mr = false;
3005 	mr->hw_mr.dma_mr = false;
3006 
3007 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3008 	if (rc) {
3009 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3010 		goto err2;
3011 	}
3012 
3013 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3014 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3015 	    mr->hw_mr.remote_atomic)
3016 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3017 
3018 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
3019 		 mr->ibmr.lkey);
3020 	return &mr->ibmr;
3021 
3022 err2:
3023 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3024 err1:
3025 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
3026 err0:
3027 	kfree(mr);
3028 	return ERR_PTR(rc);
3029 }
3030 
3031 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
3032 {
3033 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
3034 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
3035 	int rc = 0;
3036 
3037 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
3038 	if (rc)
3039 		return rc;
3040 
3041 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3042 
3043 	if (mr->type != QEDR_MR_DMA)
3044 		free_mr_info(dev, &mr->info);
3045 
3046 	/* it could be user registered memory. */
3047 	ib_umem_release(mr->umem);
3048 
3049 	kfree(mr);
3050 
3051 	return rc;
3052 }
3053 
3054 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
3055 				       int max_page_list_len)
3056 {
3057 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3058 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3059 	struct qedr_mr *mr;
3060 	int rc = -ENOMEM;
3061 
3062 	DP_DEBUG(dev, QEDR_MSG_MR,
3063 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
3064 		 max_page_list_len);
3065 
3066 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3067 	if (!mr)
3068 		return ERR_PTR(rc);
3069 
3070 	mr->dev = dev;
3071 	mr->type = QEDR_MR_FRMR;
3072 
3073 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
3074 	if (rc)
3075 		goto err0;
3076 
3077 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3078 	if (rc) {
3079 		if (rc == -EINVAL)
3080 			DP_ERR(dev, "Out of MR resources\n");
3081 		else
3082 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
3083 
3084 		goto err0;
3085 	}
3086 
3087 	/* Index only, 18 bit long, lkey = itid << 8 | key */
3088 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
3089 	mr->hw_mr.key = 0;
3090 	mr->hw_mr.pd = pd->pd_id;
3091 	mr->hw_mr.local_read = 1;
3092 	mr->hw_mr.local_write = 0;
3093 	mr->hw_mr.remote_read = 0;
3094 	mr->hw_mr.remote_write = 0;
3095 	mr->hw_mr.remote_atomic = 0;
3096 	mr->hw_mr.mw_bind = false;
3097 	mr->hw_mr.pbl_ptr = 0;
3098 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
3099 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
3100 	mr->hw_mr.length = 0;
3101 	mr->hw_mr.vaddr = 0;
3102 	mr->hw_mr.phy_mr = true;
3103 	mr->hw_mr.dma_mr = false;
3104 
3105 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3106 	if (rc) {
3107 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3108 		goto err1;
3109 	}
3110 
3111 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3112 	mr->ibmr.rkey = mr->ibmr.lkey;
3113 
3114 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
3115 	return mr;
3116 
3117 err1:
3118 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3119 err0:
3120 	kfree(mr);
3121 	return ERR_PTR(rc);
3122 }
3123 
3124 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3125 			    u32 max_num_sg)
3126 {
3127 	struct qedr_mr *mr;
3128 
3129 	if (mr_type != IB_MR_TYPE_MEM_REG)
3130 		return ERR_PTR(-EINVAL);
3131 
3132 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
3133 
3134 	if (IS_ERR(mr))
3135 		return ERR_PTR(-EINVAL);
3136 
3137 	return &mr->ibmr;
3138 }
3139 
3140 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3141 {
3142 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3143 	struct qedr_pbl *pbl_table;
3144 	struct regpair *pbe;
3145 	u32 pbes_in_page;
3146 
3147 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3148 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3149 		return -ENOMEM;
3150 	}
3151 
3152 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3153 		 mr->npages, addr);
3154 
3155 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3156 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3157 	pbe = (struct regpair *)pbl_table->va;
3158 	pbe +=  mr->npages % pbes_in_page;
3159 	pbe->lo = cpu_to_le32((u32)addr);
3160 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3161 
3162 	mr->npages++;
3163 
3164 	return 0;
3165 }
3166 
3167 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3168 {
3169 	int work = info->completed - info->completed_handled - 1;
3170 
3171 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3172 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3173 		struct qedr_pbl *pbl;
3174 
3175 		/* Free all the page list that are possible to be freed
3176 		 * (all the ones that were invalidated), under the assumption
3177 		 * that if an FMR was completed successfully that means that
3178 		 * if there was an invalidate operation before it also ended
3179 		 */
3180 		pbl = list_first_entry(&info->inuse_pbl_list,
3181 				       struct qedr_pbl, list_entry);
3182 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3183 		info->completed_handled++;
3184 	}
3185 }
3186 
3187 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3188 		   int sg_nents, unsigned int *sg_offset)
3189 {
3190 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3191 
3192 	mr->npages = 0;
3193 
3194 	handle_completed_mrs(mr->dev, &mr->info);
3195 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3196 }
3197 
3198 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3199 {
3200 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3201 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3202 	struct qedr_mr *mr;
3203 	int rc;
3204 
3205 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3206 	if (!mr)
3207 		return ERR_PTR(-ENOMEM);
3208 
3209 	mr->type = QEDR_MR_DMA;
3210 
3211 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3212 	if (rc) {
3213 		if (rc == -EINVAL)
3214 			DP_ERR(dev, "Out of MR resources\n");
3215 		else
3216 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
3217 
3218 		goto err1;
3219 	}
3220 
3221 	/* index only, 18 bit long, lkey = itid << 8 | key */
3222 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3223 	mr->hw_mr.pd = pd->pd_id;
3224 	mr->hw_mr.local_read = 1;
3225 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3226 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3227 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3228 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3229 	mr->hw_mr.dma_mr = true;
3230 
3231 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3232 	if (rc) {
3233 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3234 		goto err2;
3235 	}
3236 
3237 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3238 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3239 	    mr->hw_mr.remote_atomic)
3240 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3241 
3242 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3243 	return &mr->ibmr;
3244 
3245 err2:
3246 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3247 err1:
3248 	kfree(mr);
3249 	return ERR_PTR(rc);
3250 }
3251 
3252 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3253 {
3254 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3255 }
3256 
3257 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3258 {
3259 	int i, len = 0;
3260 
3261 	for (i = 0; i < num_sge; i++)
3262 		len += sg_list[i].length;
3263 
3264 	return len;
3265 }
3266 
3267 static void swap_wqe_data64(u64 *p)
3268 {
3269 	int i;
3270 
3271 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3272 		*p = cpu_to_be64(cpu_to_le64(*p));
3273 }
3274 
3275 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3276 				       struct qedr_qp *qp, u8 *wqe_size,
3277 				       const struct ib_send_wr *wr,
3278 				       const struct ib_send_wr **bad_wr,
3279 				       u8 *bits, u8 bit)
3280 {
3281 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3282 	char *seg_prt, *wqe;
3283 	int i, seg_siz;
3284 
3285 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3286 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3287 		*bad_wr = wr;
3288 		return 0;
3289 	}
3290 
3291 	if (!data_size)
3292 		return data_size;
3293 
3294 	*bits |= bit;
3295 
3296 	seg_prt = NULL;
3297 	wqe = NULL;
3298 	seg_siz = 0;
3299 
3300 	/* Copy data inline */
3301 	for (i = 0; i < wr->num_sge; i++) {
3302 		u32 len = wr->sg_list[i].length;
3303 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3304 
3305 		while (len > 0) {
3306 			u32 cur;
3307 
3308 			/* New segment required */
3309 			if (!seg_siz) {
3310 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3311 				seg_prt = wqe;
3312 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3313 				(*wqe_size)++;
3314 			}
3315 
3316 			/* Calculate currently allowed length */
3317 			cur = min_t(u32, len, seg_siz);
3318 			memcpy(seg_prt, src, cur);
3319 
3320 			/* Update segment variables */
3321 			seg_prt += cur;
3322 			seg_siz -= cur;
3323 
3324 			/* Update sge variables */
3325 			src += cur;
3326 			len -= cur;
3327 
3328 			/* Swap fully-completed segments */
3329 			if (!seg_siz)
3330 				swap_wqe_data64((u64 *)wqe);
3331 		}
3332 	}
3333 
3334 	/* swap last not completed segment */
3335 	if (seg_siz)
3336 		swap_wqe_data64((u64 *)wqe);
3337 
3338 	return data_size;
3339 }
3340 
3341 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3342 	do {							\
3343 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3344 		(sge)->length = cpu_to_le32(vlength);		\
3345 		(sge)->flags = cpu_to_le32(vflags);		\
3346 	} while (0)
3347 
3348 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3349 	do {							\
3350 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3351 		(hdr)->num_sges = num_sge;			\
3352 	} while (0)
3353 
3354 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3355 	do {							\
3356 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3357 		(sge)->length = cpu_to_le32(vlength);		\
3358 		(sge)->l_key = cpu_to_le32(vlkey);		\
3359 	} while (0)
3360 
3361 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3362 				const struct ib_send_wr *wr)
3363 {
3364 	u32 data_size = 0;
3365 	int i;
3366 
3367 	for (i = 0; i < wr->num_sge; i++) {
3368 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3369 
3370 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3371 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3372 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3373 		data_size += wr->sg_list[i].length;
3374 	}
3375 
3376 	if (wqe_size)
3377 		*wqe_size += wr->num_sge;
3378 
3379 	return data_size;
3380 }
3381 
3382 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3383 				     struct qedr_qp *qp,
3384 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3385 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3386 				     const struct ib_send_wr *wr,
3387 				     const struct ib_send_wr **bad_wr)
3388 {
3389 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3390 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3391 
3392 	if (wr->send_flags & IB_SEND_INLINE &&
3393 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3394 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3395 		u8 flags = 0;
3396 
3397 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3398 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3399 						   bad_wr, &rwqe->flags, flags);
3400 	}
3401 
3402 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3403 }
3404 
3405 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3406 				     struct qedr_qp *qp,
3407 				     struct rdma_sq_send_wqe_1st *swqe,
3408 				     struct rdma_sq_send_wqe_2st *swqe2,
3409 				     const struct ib_send_wr *wr,
3410 				     const struct ib_send_wr **bad_wr)
3411 {
3412 	memset(swqe2, 0, sizeof(*swqe2));
3413 	if (wr->send_flags & IB_SEND_INLINE) {
3414 		u8 flags = 0;
3415 
3416 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3417 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3418 						   bad_wr, &swqe->flags, flags);
3419 	}
3420 
3421 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3422 }
3423 
3424 static int qedr_prepare_reg(struct qedr_qp *qp,
3425 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3426 			    const struct ib_reg_wr *wr)
3427 {
3428 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3429 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3430 
3431 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3432 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3433 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3434 	fwqe1->l_key = wr->key;
3435 
3436 	fwqe2->access_ctrl = 0;
3437 
3438 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3439 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3440 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3441 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3442 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3443 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3444 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3445 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3446 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3447 	fwqe2->fmr_ctrl = 0;
3448 
3449 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3450 		   ilog2(mr->ibmr.page_size) - 12);
3451 
3452 	fwqe2->length_hi = 0;
3453 	fwqe2->length_lo = mr->ibmr.length;
3454 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3455 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3456 
3457 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3458 
3459 	return 0;
3460 }
3461 
3462 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3463 {
3464 	switch (opcode) {
3465 	case IB_WR_RDMA_WRITE:
3466 	case IB_WR_RDMA_WRITE_WITH_IMM:
3467 		return IB_WC_RDMA_WRITE;
3468 	case IB_WR_SEND_WITH_IMM:
3469 	case IB_WR_SEND:
3470 	case IB_WR_SEND_WITH_INV:
3471 		return IB_WC_SEND;
3472 	case IB_WR_RDMA_READ:
3473 	case IB_WR_RDMA_READ_WITH_INV:
3474 		return IB_WC_RDMA_READ;
3475 	case IB_WR_ATOMIC_CMP_AND_SWP:
3476 		return IB_WC_COMP_SWAP;
3477 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3478 		return IB_WC_FETCH_ADD;
3479 	case IB_WR_REG_MR:
3480 		return IB_WC_REG_MR;
3481 	case IB_WR_LOCAL_INV:
3482 		return IB_WC_LOCAL_INV;
3483 	default:
3484 		return IB_WC_SEND;
3485 	}
3486 }
3487 
3488 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3489 				      const struct ib_send_wr *wr)
3490 {
3491 	int wq_is_full, err_wr, pbl_is_full;
3492 	struct qedr_dev *dev = qp->dev;
3493 
3494 	/* prevent SQ overflow and/or processing of a bad WR */
3495 	err_wr = wr->num_sge > qp->sq.max_sges;
3496 	wq_is_full = qedr_wq_is_full(&qp->sq);
3497 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3498 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3499 	if (wq_is_full || err_wr || pbl_is_full) {
3500 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3501 			DP_ERR(dev,
3502 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3503 			       qp);
3504 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3505 		}
3506 
3507 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3508 			DP_ERR(dev,
3509 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3510 			       qp);
3511 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3512 		}
3513 
3514 		if (pbl_is_full &&
3515 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3516 			DP_ERR(dev,
3517 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3518 			       qp);
3519 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3520 		}
3521 		return false;
3522 	}
3523 	return true;
3524 }
3525 
3526 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3527 			    const struct ib_send_wr **bad_wr)
3528 {
3529 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3530 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3531 	struct rdma_sq_atomic_wqe_1st *awqe1;
3532 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3533 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3534 	struct rdma_sq_send_wqe_2st *swqe2;
3535 	struct rdma_sq_local_inv_wqe *iwqe;
3536 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3537 	struct rdma_sq_send_wqe_1st *swqe;
3538 	struct rdma_sq_rdma_wqe_1st *rwqe;
3539 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3540 	struct rdma_sq_common_wqe *wqe;
3541 	u32 length;
3542 	int rc = 0;
3543 	bool comp;
3544 
3545 	if (!qedr_can_post_send(qp, wr)) {
3546 		*bad_wr = wr;
3547 		return -ENOMEM;
3548 	}
3549 
3550 	wqe = qed_chain_produce(&qp->sq.pbl);
3551 	qp->wqe_wr_id[qp->sq.prod].signaled =
3552 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3553 
3554 	wqe->flags = 0;
3555 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3556 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3557 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3558 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3559 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3560 		   !!(wr->send_flags & IB_SEND_FENCE));
3561 	wqe->prev_wqe_size = qp->prev_wqe_size;
3562 
3563 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3564 
3565 	switch (wr->opcode) {
3566 	case IB_WR_SEND_WITH_IMM:
3567 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3568 			rc = -EINVAL;
3569 			*bad_wr = wr;
3570 			break;
3571 		}
3572 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3573 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3574 		swqe->wqe_size = 2;
3575 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3576 
3577 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3578 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3579 						   wr, bad_wr);
3580 		swqe->length = cpu_to_le32(length);
3581 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3582 		qp->prev_wqe_size = swqe->wqe_size;
3583 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3584 		break;
3585 	case IB_WR_SEND:
3586 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3587 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3588 
3589 		swqe->wqe_size = 2;
3590 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3591 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3592 						   wr, bad_wr);
3593 		swqe->length = cpu_to_le32(length);
3594 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3595 		qp->prev_wqe_size = swqe->wqe_size;
3596 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3597 		break;
3598 	case IB_WR_SEND_WITH_INV:
3599 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3600 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3601 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3602 		swqe->wqe_size = 2;
3603 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3604 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3605 						   wr, bad_wr);
3606 		swqe->length = cpu_to_le32(length);
3607 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3608 		qp->prev_wqe_size = swqe->wqe_size;
3609 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3610 		break;
3611 
3612 	case IB_WR_RDMA_WRITE_WITH_IMM:
3613 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3614 			rc = -EINVAL;
3615 			*bad_wr = wr;
3616 			break;
3617 		}
3618 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3619 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3620 
3621 		rwqe->wqe_size = 2;
3622 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3623 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3624 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3625 						   wr, bad_wr);
3626 		rwqe->length = cpu_to_le32(length);
3627 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3628 		qp->prev_wqe_size = rwqe->wqe_size;
3629 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3630 		break;
3631 	case IB_WR_RDMA_WRITE:
3632 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3633 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3634 
3635 		rwqe->wqe_size = 2;
3636 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3637 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3638 						   wr, bad_wr);
3639 		rwqe->length = cpu_to_le32(length);
3640 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3641 		qp->prev_wqe_size = rwqe->wqe_size;
3642 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3643 		break;
3644 	case IB_WR_RDMA_READ_WITH_INV:
3645 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3646 		fallthrough;	/* same is identical to RDMA READ */
3647 
3648 	case IB_WR_RDMA_READ:
3649 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3650 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3651 
3652 		rwqe->wqe_size = 2;
3653 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3654 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3655 						   wr, bad_wr);
3656 		rwqe->length = cpu_to_le32(length);
3657 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3658 		qp->prev_wqe_size = rwqe->wqe_size;
3659 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3660 		break;
3661 
3662 	case IB_WR_ATOMIC_CMP_AND_SWP:
3663 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3664 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3665 		awqe1->wqe_size = 4;
3666 
3667 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3668 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3669 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3670 
3671 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3672 
3673 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3674 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3675 			DMA_REGPAIR_LE(awqe3->swap_data,
3676 				       atomic_wr(wr)->compare_add);
3677 		} else {
3678 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3679 			DMA_REGPAIR_LE(awqe3->swap_data,
3680 				       atomic_wr(wr)->swap);
3681 			DMA_REGPAIR_LE(awqe3->cmp_data,
3682 				       atomic_wr(wr)->compare_add);
3683 		}
3684 
3685 		qedr_prepare_sq_sges(qp, NULL, wr);
3686 
3687 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3688 		qp->prev_wqe_size = awqe1->wqe_size;
3689 		break;
3690 
3691 	case IB_WR_LOCAL_INV:
3692 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3693 		iwqe->wqe_size = 1;
3694 
3695 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3696 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3697 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3698 		qp->prev_wqe_size = iwqe->wqe_size;
3699 		break;
3700 	case IB_WR_REG_MR:
3701 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3702 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3703 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3704 		fwqe1->wqe_size = 2;
3705 
3706 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3707 		if (rc) {
3708 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3709 			*bad_wr = wr;
3710 			break;
3711 		}
3712 
3713 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3714 		qp->prev_wqe_size = fwqe1->wqe_size;
3715 		break;
3716 	default:
3717 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3718 		rc = -EINVAL;
3719 		*bad_wr = wr;
3720 		break;
3721 	}
3722 
3723 	if (*bad_wr) {
3724 		u16 value;
3725 
3726 		/* Restore prod to its position before
3727 		 * this WR was processed
3728 		 */
3729 		value = le16_to_cpu(qp->sq.db_data.data.value);
3730 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3731 
3732 		/* Restore prev_wqe_size */
3733 		qp->prev_wqe_size = wqe->prev_wqe_size;
3734 		rc = -EINVAL;
3735 		DP_ERR(dev, "POST SEND FAILED\n");
3736 	}
3737 
3738 	return rc;
3739 }
3740 
3741 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3742 		   const struct ib_send_wr **bad_wr)
3743 {
3744 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3745 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3746 	unsigned long flags;
3747 	int rc = 0;
3748 
3749 	*bad_wr = NULL;
3750 
3751 	if (qp->qp_type == IB_QPT_GSI)
3752 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3753 
3754 	spin_lock_irqsave(&qp->q_lock, flags);
3755 
3756 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3757 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3758 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3759 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3760 			spin_unlock_irqrestore(&qp->q_lock, flags);
3761 			*bad_wr = wr;
3762 			DP_DEBUG(dev, QEDR_MSG_CQ,
3763 				 "QP in wrong state! QP icid=0x%x state %d\n",
3764 				 qp->icid, qp->state);
3765 			return -EINVAL;
3766 		}
3767 	}
3768 
3769 	while (wr) {
3770 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3771 		if (rc)
3772 			break;
3773 
3774 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3775 
3776 		qedr_inc_sw_prod(&qp->sq);
3777 
3778 		qp->sq.db_data.data.value++;
3779 
3780 		wr = wr->next;
3781 	}
3782 
3783 	/* Trigger doorbell
3784 	 * If there was a failure in the first WR then it will be triggered in
3785 	 * vane. However this is not harmful (as long as the producer value is
3786 	 * unchanged). For performance reasons we avoid checking for this
3787 	 * redundant doorbell.
3788 	 *
3789 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3790 	 * soon as we give the doorbell, we could get a completion
3791 	 * for this wr, therefore we need to make sure that the
3792 	 * memory is updated before giving the doorbell.
3793 	 * During qedr_poll_cq, rmb is called before accessing the
3794 	 * cqe. This covers for the smp_rmb as well.
3795 	 */
3796 	smp_wmb();
3797 	writel(qp->sq.db_data.raw, qp->sq.db);
3798 
3799 	spin_unlock_irqrestore(&qp->q_lock, flags);
3800 
3801 	return rc;
3802 }
3803 
3804 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3805 {
3806 	u32 used;
3807 
3808 	/* Calculate number of elements used based on producer
3809 	 * count and consumer count and subtract it from max
3810 	 * work request supported so that we get elements left.
3811 	 */
3812 	used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
3813 
3814 	return hw_srq->max_wr - used;
3815 }
3816 
3817 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3818 		       const struct ib_recv_wr **bad_wr)
3819 {
3820 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3821 	struct qedr_srq_hwq_info *hw_srq;
3822 	struct qedr_dev *dev = srq->dev;
3823 	struct qed_chain *pbl;
3824 	unsigned long flags;
3825 	int status = 0;
3826 	u32 num_sge;
3827 
3828 	spin_lock_irqsave(&srq->lock, flags);
3829 
3830 	hw_srq = &srq->hw_srq;
3831 	pbl = &srq->hw_srq.pbl;
3832 	while (wr) {
3833 		struct rdma_srq_wqe_header *hdr;
3834 		int i;
3835 
3836 		if (!qedr_srq_elem_left(hw_srq) ||
3837 		    wr->num_sge > srq->hw_srq.max_sges) {
3838 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3839 			       hw_srq->wr_prod_cnt,
3840 			       atomic_read(&hw_srq->wr_cons_cnt),
3841 			       wr->num_sge, srq->hw_srq.max_sges);
3842 			status = -ENOMEM;
3843 			*bad_wr = wr;
3844 			break;
3845 		}
3846 
3847 		hdr = qed_chain_produce(pbl);
3848 		num_sge = wr->num_sge;
3849 		/* Set number of sge and work request id in header */
3850 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3851 
3852 		srq->hw_srq.wr_prod_cnt++;
3853 		hw_srq->wqe_prod++;
3854 		hw_srq->sge_prod++;
3855 
3856 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3857 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3858 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3859 
3860 		for (i = 0; i < wr->num_sge; i++) {
3861 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3862 
3863 			/* Set SGE length, lkey and address */
3864 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3865 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3866 
3867 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3868 				 "[%d]: len %d key %x addr %x:%x\n",
3869 				 i, srq_sge->length, srq_sge->l_key,
3870 				 srq_sge->addr.hi, srq_sge->addr.lo);
3871 			hw_srq->sge_prod++;
3872 		}
3873 
3874 		/* Update WQE and SGE information before
3875 		 * updating producer.
3876 		 */
3877 		dma_wmb();
3878 
3879 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3880 		 * in first 4 bytes and need to update WQE producer in
3881 		 * next 4 bytes.
3882 		 */
3883 		srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod);
3884 		/* Make sure sge producer is updated first */
3885 		dma_wmb();
3886 		srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod);
3887 
3888 		wr = wr->next;
3889 	}
3890 
3891 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3892 		 qed_chain_get_elem_left(pbl));
3893 	spin_unlock_irqrestore(&srq->lock, flags);
3894 
3895 	return status;
3896 }
3897 
3898 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3899 		   const struct ib_recv_wr **bad_wr)
3900 {
3901 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3902 	struct qedr_dev *dev = qp->dev;
3903 	unsigned long flags;
3904 	int status = 0;
3905 
3906 	if (qp->qp_type == IB_QPT_GSI)
3907 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3908 
3909 	spin_lock_irqsave(&qp->q_lock, flags);
3910 
3911 	while (wr) {
3912 		int i;
3913 
3914 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3915 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3916 		    wr->num_sge > qp->rq.max_sges) {
3917 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3918 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3919 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3920 			       qp->rq.max_sges);
3921 			status = -ENOMEM;
3922 			*bad_wr = wr;
3923 			break;
3924 		}
3925 		for (i = 0; i < wr->num_sge; i++) {
3926 			u32 flags = 0;
3927 			struct rdma_rq_sge *rqe =
3928 			    qed_chain_produce(&qp->rq.pbl);
3929 
3930 			/* First one must include the number
3931 			 * of SGE in the list
3932 			 */
3933 			if (!i)
3934 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3935 					  wr->num_sge);
3936 
3937 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3938 				  wr->sg_list[i].lkey);
3939 
3940 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3941 				   wr->sg_list[i].length, flags);
3942 		}
3943 
3944 		/* Special case of no sges. FW requires between 1-4 sges...
3945 		 * in this case we need to post 1 sge with length zero. this is
3946 		 * because rdma write with immediate consumes an RQ.
3947 		 */
3948 		if (!wr->num_sge) {
3949 			u32 flags = 0;
3950 			struct rdma_rq_sge *rqe =
3951 			    qed_chain_produce(&qp->rq.pbl);
3952 
3953 			/* First one must include the number
3954 			 * of SGE in the list
3955 			 */
3956 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3957 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3958 
3959 			RQ_SGE_SET(rqe, 0, 0, flags);
3960 			i = 1;
3961 		}
3962 
3963 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3964 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3965 
3966 		qedr_inc_sw_prod(&qp->rq);
3967 
3968 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3969 		 * soon as we give the doorbell, we could get a completion
3970 		 * for this wr, therefore we need to make sure that the
3971 		 * memory is update before giving the doorbell.
3972 		 * During qedr_poll_cq, rmb is called before accessing the
3973 		 * cqe. This covers for the smp_rmb as well.
3974 		 */
3975 		smp_wmb();
3976 
3977 		qp->rq.db_data.data.value++;
3978 
3979 		writel(qp->rq.db_data.raw, qp->rq.db);
3980 
3981 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3982 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3983 		}
3984 
3985 		wr = wr->next;
3986 	}
3987 
3988 	spin_unlock_irqrestore(&qp->q_lock, flags);
3989 
3990 	return status;
3991 }
3992 
3993 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3994 {
3995 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3996 
3997 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3998 		cq->pbl_toggle;
3999 }
4000 
4001 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
4002 {
4003 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
4004 	struct qedr_qp *qp;
4005 
4006 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
4007 						   resp_cqe->qp_handle.lo,
4008 						   u64);
4009 	return qp;
4010 }
4011 
4012 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
4013 {
4014 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
4015 
4016 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
4017 }
4018 
4019 /* Return latest CQE (needs processing) */
4020 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
4021 {
4022 	return cq->latest_cqe;
4023 }
4024 
4025 /* In fmr we need to increase the number of fmr completed counter for the fmr
4026  * algorithm determining whether we can free a pbl or not.
4027  * we need to perform this whether the work request was signaled or not. for
4028  * this purpose we call this function from the condition that checks if a wr
4029  * should be skipped, to make sure we don't miss it ( possibly this fmr
4030  * operation was not signalted)
4031  */
4032 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
4033 {
4034 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
4035 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
4036 }
4037 
4038 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
4039 		       struct qedr_cq *cq, int num_entries,
4040 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
4041 		       int force)
4042 {
4043 	u16 cnt = 0;
4044 
4045 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
4046 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
4047 			qedr_chk_if_fmr(qp);
4048 			/* skip WC */
4049 			goto next_cqe;
4050 		}
4051 
4052 		/* fill WC */
4053 		wc->status = status;
4054 		wc->vendor_err = 0;
4055 		wc->wc_flags = 0;
4056 		wc->src_qp = qp->id;
4057 		wc->qp = &qp->ibqp;
4058 
4059 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
4060 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
4061 
4062 		switch (wc->opcode) {
4063 		case IB_WC_RDMA_WRITE:
4064 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
4065 			break;
4066 		case IB_WC_COMP_SWAP:
4067 		case IB_WC_FETCH_ADD:
4068 			wc->byte_len = 8;
4069 			break;
4070 		case IB_WC_REG_MR:
4071 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
4072 			break;
4073 		case IB_WC_RDMA_READ:
4074 		case IB_WC_SEND:
4075 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
4076 			break;
4077 		default:
4078 			break;
4079 		}
4080 
4081 		num_entries--;
4082 		wc++;
4083 		cnt++;
4084 next_cqe:
4085 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
4086 			qed_chain_consume(&qp->sq.pbl);
4087 		qedr_inc_sw_cons(&qp->sq);
4088 	}
4089 
4090 	return cnt;
4091 }
4092 
4093 static int qedr_poll_cq_req(struct qedr_dev *dev,
4094 			    struct qedr_qp *qp, struct qedr_cq *cq,
4095 			    int num_entries, struct ib_wc *wc,
4096 			    struct rdma_cqe_requester *req)
4097 {
4098 	int cnt = 0;
4099 
4100 	switch (req->status) {
4101 	case RDMA_CQE_REQ_STS_OK:
4102 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4103 				  IB_WC_SUCCESS, 0);
4104 		break;
4105 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
4106 		if (qp->state != QED_ROCE_QP_STATE_ERR)
4107 			DP_DEBUG(dev, QEDR_MSG_CQ,
4108 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4109 				 cq->icid, qp->icid);
4110 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4111 				  IB_WC_WR_FLUSH_ERR, 1);
4112 		break;
4113 	default:
4114 		/* process all WQE before the cosumer */
4115 		qp->state = QED_ROCE_QP_STATE_ERR;
4116 		cnt = process_req(dev, qp, cq, num_entries, wc,
4117 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
4118 		wc += cnt;
4119 		/* if we have extra WC fill it with actual error info */
4120 		if (cnt < num_entries) {
4121 			enum ib_wc_status wc_status;
4122 
4123 			switch (req->status) {
4124 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4125 				DP_ERR(dev,
4126 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4127 				       cq->icid, qp->icid);
4128 				wc_status = IB_WC_BAD_RESP_ERR;
4129 				break;
4130 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4131 				DP_ERR(dev,
4132 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4133 				       cq->icid, qp->icid);
4134 				wc_status = IB_WC_LOC_LEN_ERR;
4135 				break;
4136 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4137 				DP_ERR(dev,
4138 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4139 				       cq->icid, qp->icid);
4140 				wc_status = IB_WC_LOC_QP_OP_ERR;
4141 				break;
4142 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4143 				DP_ERR(dev,
4144 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4145 				       cq->icid, qp->icid);
4146 				wc_status = IB_WC_LOC_PROT_ERR;
4147 				break;
4148 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4149 				DP_ERR(dev,
4150 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4151 				       cq->icid, qp->icid);
4152 				wc_status = IB_WC_MW_BIND_ERR;
4153 				break;
4154 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4155 				DP_ERR(dev,
4156 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4157 				       cq->icid, qp->icid);
4158 				wc_status = IB_WC_REM_INV_REQ_ERR;
4159 				break;
4160 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4161 				DP_ERR(dev,
4162 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4163 				       cq->icid, qp->icid);
4164 				wc_status = IB_WC_REM_ACCESS_ERR;
4165 				break;
4166 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4167 				DP_ERR(dev,
4168 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4169 				       cq->icid, qp->icid);
4170 				wc_status = IB_WC_REM_OP_ERR;
4171 				break;
4172 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4173 				DP_ERR(dev,
4174 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4175 				       cq->icid, qp->icid);
4176 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4177 				break;
4178 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4179 				DP_ERR(dev,
4180 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4181 				       cq->icid, qp->icid);
4182 				wc_status = IB_WC_RETRY_EXC_ERR;
4183 				break;
4184 			default:
4185 				DP_ERR(dev,
4186 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4187 				       cq->icid, qp->icid);
4188 				wc_status = IB_WC_GENERAL_ERR;
4189 			}
4190 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4191 					   wc_status, 1);
4192 		}
4193 	}
4194 
4195 	return cnt;
4196 }
4197 
4198 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4199 {
4200 	switch (status) {
4201 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4202 		return IB_WC_LOC_ACCESS_ERR;
4203 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4204 		return IB_WC_LOC_LEN_ERR;
4205 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4206 		return IB_WC_LOC_QP_OP_ERR;
4207 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4208 		return IB_WC_LOC_PROT_ERR;
4209 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4210 		return IB_WC_MW_BIND_ERR;
4211 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4212 		return IB_WC_REM_INV_RD_REQ_ERR;
4213 	case RDMA_CQE_RESP_STS_OK:
4214 		return IB_WC_SUCCESS;
4215 	default:
4216 		return IB_WC_GENERAL_ERR;
4217 	}
4218 }
4219 
4220 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4221 					  struct ib_wc *wc)
4222 {
4223 	wc->status = IB_WC_SUCCESS;
4224 	wc->byte_len = le32_to_cpu(resp->length);
4225 
4226 	if (resp->flags & QEDR_RESP_IMM) {
4227 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4228 		wc->wc_flags |= IB_WC_WITH_IMM;
4229 
4230 		if (resp->flags & QEDR_RESP_RDMA)
4231 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4232 
4233 		if (resp->flags & QEDR_RESP_INV)
4234 			return -EINVAL;
4235 
4236 	} else if (resp->flags & QEDR_RESP_INV) {
4237 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4238 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4239 
4240 		if (resp->flags & QEDR_RESP_RDMA)
4241 			return -EINVAL;
4242 
4243 	} else if (resp->flags & QEDR_RESP_RDMA) {
4244 		return -EINVAL;
4245 	}
4246 
4247 	return 0;
4248 }
4249 
4250 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4251 			       struct qedr_cq *cq, struct ib_wc *wc,
4252 			       struct rdma_cqe_responder *resp, u64 wr_id)
4253 {
4254 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4255 	wc->opcode = IB_WC_RECV;
4256 	wc->wc_flags = 0;
4257 
4258 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4259 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4260 			DP_ERR(dev,
4261 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4262 			       cq, cq->icid, resp->flags);
4263 
4264 	} else {
4265 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4266 		if (wc->status == IB_WC_GENERAL_ERR)
4267 			DP_ERR(dev,
4268 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4269 			       cq, cq->icid, resp->status);
4270 	}
4271 
4272 	/* Fill the rest of the WC */
4273 	wc->vendor_err = 0;
4274 	wc->src_qp = qp->id;
4275 	wc->qp = &qp->ibqp;
4276 	wc->wr_id = wr_id;
4277 }
4278 
4279 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4280 				struct qedr_cq *cq, struct ib_wc *wc,
4281 				struct rdma_cqe_responder *resp)
4282 {
4283 	struct qedr_srq *srq = qp->srq;
4284 	u64 wr_id;
4285 
4286 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4287 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4288 
4289 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4290 		wc->status = IB_WC_WR_FLUSH_ERR;
4291 		wc->vendor_err = 0;
4292 		wc->wr_id = wr_id;
4293 		wc->byte_len = 0;
4294 		wc->src_qp = qp->id;
4295 		wc->qp = &qp->ibqp;
4296 		wc->wr_id = wr_id;
4297 	} else {
4298 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4299 	}
4300 	atomic_inc(&srq->hw_srq.wr_cons_cnt);
4301 
4302 	return 1;
4303 }
4304 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4305 			    struct qedr_cq *cq, struct ib_wc *wc,
4306 			    struct rdma_cqe_responder *resp)
4307 {
4308 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4309 
4310 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4311 
4312 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4313 		qed_chain_consume(&qp->rq.pbl);
4314 	qedr_inc_sw_cons(&qp->rq);
4315 
4316 	return 1;
4317 }
4318 
4319 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4320 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4321 {
4322 	u16 cnt = 0;
4323 
4324 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4325 		/* fill WC */
4326 		wc->status = IB_WC_WR_FLUSH_ERR;
4327 		wc->vendor_err = 0;
4328 		wc->wc_flags = 0;
4329 		wc->src_qp = qp->id;
4330 		wc->byte_len = 0;
4331 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4332 		wc->qp = &qp->ibqp;
4333 		num_entries--;
4334 		wc++;
4335 		cnt++;
4336 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4337 			qed_chain_consume(&qp->rq.pbl);
4338 		qedr_inc_sw_cons(&qp->rq);
4339 	}
4340 
4341 	return cnt;
4342 }
4343 
4344 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4345 				 struct rdma_cqe_responder *resp, int *update)
4346 {
4347 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4348 		consume_cqe(cq);
4349 		*update |= 1;
4350 	}
4351 }
4352 
4353 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4354 				 struct qedr_cq *cq, int num_entries,
4355 				 struct ib_wc *wc,
4356 				 struct rdma_cqe_responder *resp)
4357 {
4358 	int cnt;
4359 
4360 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4361 	consume_cqe(cq);
4362 
4363 	return cnt;
4364 }
4365 
4366 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4367 			     struct qedr_cq *cq, int num_entries,
4368 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4369 			     int *update)
4370 {
4371 	int cnt;
4372 
4373 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4374 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4375 					 resp->rq_cons_or_srq_id);
4376 		try_consume_resp_cqe(cq, qp, resp, update);
4377 	} else {
4378 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4379 		consume_cqe(cq);
4380 		*update |= 1;
4381 	}
4382 
4383 	return cnt;
4384 }
4385 
4386 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4387 				struct rdma_cqe_requester *req, int *update)
4388 {
4389 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4390 		consume_cqe(cq);
4391 		*update |= 1;
4392 	}
4393 }
4394 
4395 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4396 {
4397 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4398 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4399 	union rdma_cqe *cqe;
4400 	u32 old_cons, new_cons;
4401 	unsigned long flags;
4402 	int update = 0;
4403 	int done = 0;
4404 
4405 	if (cq->destroyed) {
4406 		DP_ERR(dev,
4407 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4408 		       cq, cq->icid);
4409 		return 0;
4410 	}
4411 
4412 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4413 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4414 
4415 	spin_lock_irqsave(&cq->cq_lock, flags);
4416 	cqe = cq->latest_cqe;
4417 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4418 	while (num_entries && is_valid_cqe(cq, cqe)) {
4419 		struct qedr_qp *qp;
4420 		int cnt = 0;
4421 
4422 		/* prevent speculative reads of any field of CQE */
4423 		rmb();
4424 
4425 		qp = cqe_get_qp(cqe);
4426 		if (!qp) {
4427 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4428 			break;
4429 		}
4430 
4431 		wc->qp = &qp->ibqp;
4432 
4433 		switch (cqe_get_type(cqe)) {
4434 		case RDMA_CQE_TYPE_REQUESTER:
4435 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4436 					       &cqe->req);
4437 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4438 			break;
4439 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4440 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4441 						&cqe->resp, &update);
4442 			break;
4443 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4444 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4445 						    wc, &cqe->resp);
4446 			update = 1;
4447 			break;
4448 		case RDMA_CQE_TYPE_INVALID:
4449 		default:
4450 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4451 			       cqe_get_type(cqe));
4452 		}
4453 		num_entries -= cnt;
4454 		wc += cnt;
4455 		done += cnt;
4456 
4457 		cqe = get_cqe(cq);
4458 	}
4459 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4460 
4461 	cq->cq_cons += new_cons - old_cons;
4462 
4463 	if (update)
4464 		/* doorbell notifies abount latest VALID entry,
4465 		 * but chain already point to the next INVALID one
4466 		 */
4467 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4468 
4469 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4470 	return done;
4471 }
4472 
4473 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4474 		     u32 port_num, const struct ib_wc *in_wc,
4475 		     const struct ib_grh *in_grh, const struct ib_mad *in,
4476 		     struct ib_mad *out_mad, size_t *out_mad_size,
4477 		     u16 *out_mad_pkey_index)
4478 {
4479 	return IB_MAD_RESULT_SUCCESS;
4480 }
4481