xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 64794d6db49730d22f440aef0cf4da98a56a4ea3)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
55 
56 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
57 #define	RDMA_MAX_SGE_PER_SRQ	(4)
58 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
59 
60 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
61 
62 enum {
63 	QEDR_USER_MMAP_IO_WC = 0,
64 	QEDR_USER_MMAP_PHYS_PAGE,
65 };
66 
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
68 					size_t len)
69 {
70 	size_t min_len = min_t(size_t, len, udata->outlen);
71 
72 	return ib_copy_to_udata(udata, src, min_len);
73 }
74 
75 int qedr_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
76 {
77 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
78 		return -EINVAL;
79 
80 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
81 	return 0;
82 }
83 
84 int qedr_iw_query_gid(struct ib_device *ibdev, u32 port,
85 		      int index, union ib_gid *sgid)
86 {
87 	struct qedr_dev *dev = get_qedr_dev(ibdev);
88 
89 	memset(sgid->raw, 0, sizeof(sgid->raw));
90 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
91 
92 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 		 sgid->global.interface_id, sgid->global.subnet_prefix);
94 
95 	return 0;
96 }
97 
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
99 {
100 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 	struct qedr_device_attr *qattr = &dev->attr;
102 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
103 
104 	srq_attr->srq_limit = srq->srq_limit;
105 	srq_attr->max_wr = qattr->max_srq_wr;
106 	srq_attr->max_sge = qattr->max_sge;
107 
108 	return 0;
109 }
110 
111 int qedr_query_device(struct ib_device *ibdev,
112 		      struct ib_device_attr *attr, struct ib_udata *udata)
113 {
114 	struct qedr_dev *dev = get_qedr_dev(ibdev);
115 	struct qedr_device_attr *qattr = &dev->attr;
116 
117 	if (!dev->rdma_ctx) {
118 		DP_ERR(dev,
119 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
120 		       dev->rdma_ctx);
121 		return -EINVAL;
122 	}
123 
124 	memset(attr, 0, sizeof(*attr));
125 
126 	attr->fw_ver = qattr->fw_ver;
127 	attr->sys_image_guid = qattr->sys_image_guid;
128 	attr->max_mr_size = qattr->max_mr_size;
129 	attr->page_size_cap = qattr->page_size_caps;
130 	attr->vendor_id = qattr->vendor_id;
131 	attr->vendor_part_id = qattr->vendor_part_id;
132 	attr->hw_ver = qattr->hw_ver;
133 	attr->max_qp = qattr->max_qp;
134 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 	    IB_DEVICE_RC_RNR_NAK_GEN |
137 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
138 
139 	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
140 		attr->device_cap_flags |= IB_DEVICE_XRC;
141 	attr->max_send_sge = qattr->max_sge;
142 	attr->max_recv_sge = qattr->max_sge;
143 	attr->max_sge_rd = qattr->max_sge;
144 	attr->max_cq = qattr->max_cq;
145 	attr->max_cqe = qattr->max_cqe;
146 	attr->max_mr = qattr->max_mr;
147 	attr->max_mw = qattr->max_mw;
148 	attr->max_pd = qattr->max_pd;
149 	attr->atomic_cap = dev->atomic_cap;
150 	attr->max_qp_init_rd_atom =
151 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
152 	attr->max_qp_rd_atom =
153 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
154 		attr->max_qp_init_rd_atom);
155 
156 	attr->max_srq = qattr->max_srq;
157 	attr->max_srq_sge = qattr->max_srq_sge;
158 	attr->max_srq_wr = qattr->max_srq_wr;
159 
160 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
161 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
162 	attr->max_pkeys = qattr->max_pkey;
163 	attr->max_ah = qattr->max_ah;
164 
165 	return 0;
166 }
167 
168 static inline void get_link_speed_and_width(int speed, u16 *ib_speed,
169 					    u8 *ib_width)
170 {
171 	switch (speed) {
172 	case 1000:
173 		*ib_speed = IB_SPEED_SDR;
174 		*ib_width = IB_WIDTH_1X;
175 		break;
176 	case 10000:
177 		*ib_speed = IB_SPEED_QDR;
178 		*ib_width = IB_WIDTH_1X;
179 		break;
180 
181 	case 20000:
182 		*ib_speed = IB_SPEED_DDR;
183 		*ib_width = IB_WIDTH_4X;
184 		break;
185 
186 	case 25000:
187 		*ib_speed = IB_SPEED_EDR;
188 		*ib_width = IB_WIDTH_1X;
189 		break;
190 
191 	case 40000:
192 		*ib_speed = IB_SPEED_QDR;
193 		*ib_width = IB_WIDTH_4X;
194 		break;
195 
196 	case 50000:
197 		*ib_speed = IB_SPEED_HDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 100000:
202 		*ib_speed = IB_SPEED_EDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	default:
207 		/* Unsupported */
208 		*ib_speed = IB_SPEED_SDR;
209 		*ib_width = IB_WIDTH_1X;
210 	}
211 }
212 
213 int qedr_query_port(struct ib_device *ibdev, u32 port,
214 		    struct ib_port_attr *attr)
215 {
216 	struct qedr_dev *dev;
217 	struct qed_rdma_port *rdma_port;
218 
219 	dev = get_qedr_dev(ibdev);
220 
221 	if (!dev->rdma_ctx) {
222 		DP_ERR(dev, "rdma_ctx is NULL\n");
223 		return -EINVAL;
224 	}
225 
226 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
227 
228 	/* *attr being zeroed by the caller, avoid zeroing it here */
229 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
230 		attr->state = IB_PORT_ACTIVE;
231 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
232 	} else {
233 		attr->state = IB_PORT_DOWN;
234 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
235 	}
236 	attr->max_mtu = IB_MTU_4096;
237 	attr->lid = 0;
238 	attr->lmc = 0;
239 	attr->sm_lid = 0;
240 	attr->sm_sl = 0;
241 	attr->ip_gids = true;
242 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
243 		attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu);
244 		attr->gid_tbl_len = 1;
245 	} else {
246 		attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
247 		attr->gid_tbl_len = QEDR_MAX_SGID;
248 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
249 	}
250 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
251 	attr->qkey_viol_cntr = 0;
252 	get_link_speed_and_width(rdma_port->link_speed,
253 				 &attr->active_speed, &attr->active_width);
254 	attr->max_msg_sz = rdma_port->max_msg_size;
255 	attr->max_vl_num = 4;
256 
257 	return 0;
258 }
259 
260 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
261 {
262 	struct ib_device *ibdev = uctx->device;
263 	int rc;
264 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
265 	struct qedr_alloc_ucontext_resp uresp = {};
266 	struct qedr_alloc_ucontext_req ureq = {};
267 	struct qedr_dev *dev = get_qedr_dev(ibdev);
268 	struct qed_rdma_add_user_out_params oparams;
269 	struct qedr_user_mmap_entry *entry;
270 
271 	if (!udata)
272 		return -EFAULT;
273 
274 	if (udata->inlen) {
275 		rc = ib_copy_from_udata(&ureq, udata,
276 					min(sizeof(ureq), udata->inlen));
277 		if (rc) {
278 			DP_ERR(dev, "Problem copying data from user space\n");
279 			return -EFAULT;
280 		}
281 		ctx->edpm_mode = !!(ureq.context_flags &
282 				    QEDR_ALLOC_UCTX_EDPM_MODE);
283 		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
284 	}
285 
286 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
287 	if (rc) {
288 		DP_ERR(dev,
289 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
290 		       rc);
291 		return rc;
292 	}
293 
294 	ctx->dpi = oparams.dpi;
295 	ctx->dpi_addr = oparams.dpi_addr;
296 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
297 	ctx->dpi_size = oparams.dpi_size;
298 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
299 	if (!entry) {
300 		rc = -ENOMEM;
301 		goto err;
302 	}
303 
304 	entry->io_address = ctx->dpi_phys_addr;
305 	entry->length = ctx->dpi_size;
306 	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
307 	entry->dpi = ctx->dpi;
308 	entry->dev = dev;
309 	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
310 					 ctx->dpi_size);
311 	if (rc) {
312 		kfree(entry);
313 		goto err;
314 	}
315 	ctx->db_mmap_entry = &entry->rdma_entry;
316 
317 	if (!dev->user_dpm_enabled)
318 		uresp.dpm_flags = 0;
319 	else if (rdma_protocol_iwarp(&dev->ibdev, 1))
320 		uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
321 	else
322 		uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
323 				  QEDR_DPM_TYPE_ROCE_LEGACY |
324 				  QEDR_DPM_TYPE_ROCE_EDPM_MODE;
325 
326 	if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
327 		uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
328 		uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
329 		uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
330 		uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
331 	}
332 
333 	uresp.wids_enabled = 1;
334 	uresp.wid_count = oparams.wid_count;
335 	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
336 	uresp.db_size = ctx->dpi_size;
337 	uresp.max_send_wr = dev->attr.max_sqe;
338 	uresp.max_recv_wr = dev->attr.max_rqe;
339 	uresp.max_srq_wr = dev->attr.max_srq_wr;
340 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
341 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
342 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
343 	uresp.max_cqes = QEDR_MAX_CQES;
344 
345 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
346 	if (rc)
347 		goto err;
348 
349 	ctx->dev = dev;
350 
351 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
352 		 &ctx->ibucontext);
353 	return 0;
354 
355 err:
356 	if (!ctx->db_mmap_entry)
357 		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
358 	else
359 		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
360 
361 	return rc;
362 }
363 
364 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
365 {
366 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
367 
368 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
369 		 uctx);
370 
371 	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
372 }
373 
374 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
375 {
376 	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
377 	struct qedr_dev *dev = entry->dev;
378 
379 	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
380 		free_page((unsigned long)entry->address);
381 	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
382 		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
383 
384 	kfree(entry);
385 }
386 
387 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
388 {
389 	struct ib_device *dev = ucontext->device;
390 	size_t length = vma->vm_end - vma->vm_start;
391 	struct rdma_user_mmap_entry *rdma_entry;
392 	struct qedr_user_mmap_entry *entry;
393 	int rc = 0;
394 	u64 pfn;
395 
396 	ibdev_dbg(dev,
397 		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
398 		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
399 
400 	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
401 	if (!rdma_entry) {
402 		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
403 			  vma->vm_pgoff);
404 		return -EINVAL;
405 	}
406 	entry = get_qedr_mmap_entry(rdma_entry);
407 	ibdev_dbg(dev,
408 		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
409 		  entry->io_address, length, entry->mmap_flag);
410 
411 	switch (entry->mmap_flag) {
412 	case QEDR_USER_MMAP_IO_WC:
413 		pfn = entry->io_address >> PAGE_SHIFT;
414 		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
415 				       pgprot_writecombine(vma->vm_page_prot),
416 				       rdma_entry);
417 		break;
418 	case QEDR_USER_MMAP_PHYS_PAGE:
419 		rc = vm_insert_page(vma, vma->vm_start,
420 				    virt_to_page(entry->address));
421 		break;
422 	default:
423 		rc = -EINVAL;
424 	}
425 
426 	if (rc)
427 		ibdev_dbg(dev,
428 			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
429 			  entry->io_address, length, entry->mmap_flag, rc);
430 
431 	rdma_user_mmap_entry_put(rdma_entry);
432 	return rc;
433 }
434 
435 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
436 {
437 	struct ib_device *ibdev = ibpd->device;
438 	struct qedr_dev *dev = get_qedr_dev(ibdev);
439 	struct qedr_pd *pd = get_qedr_pd(ibpd);
440 	u16 pd_id;
441 	int rc;
442 
443 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
444 		 udata ? "User Lib" : "Kernel");
445 
446 	if (!dev->rdma_ctx) {
447 		DP_ERR(dev, "invalid RDMA context\n");
448 		return -EINVAL;
449 	}
450 
451 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
452 	if (rc)
453 		return rc;
454 
455 	pd->pd_id = pd_id;
456 
457 	if (udata) {
458 		struct qedr_alloc_pd_uresp uresp = {
459 			.pd_id = pd_id,
460 		};
461 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
462 			udata, struct qedr_ucontext, ibucontext);
463 
464 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
465 		if (rc) {
466 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
467 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
468 			return rc;
469 		}
470 
471 		pd->uctx = context;
472 		pd->uctx->pd = pd;
473 	}
474 
475 	return 0;
476 }
477 
478 int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
479 {
480 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
481 	struct qedr_pd *pd = get_qedr_pd(ibpd);
482 
483 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
484 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
485 	return 0;
486 }
487 
488 
489 int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
490 {
491 	struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
492 	struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd);
493 
494 	return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id);
495 }
496 
497 int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
498 {
499 	struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
500 	u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id;
501 
502 	dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id);
503 	return 0;
504 }
505 static void qedr_free_pbl(struct qedr_dev *dev,
506 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
507 {
508 	struct pci_dev *pdev = dev->pdev;
509 	int i;
510 
511 	for (i = 0; i < pbl_info->num_pbls; i++) {
512 		if (!pbl[i].va)
513 			continue;
514 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
515 				  pbl[i].va, pbl[i].pa);
516 	}
517 
518 	kfree(pbl);
519 }
520 
521 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
522 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
523 
524 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
525 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
526 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
527 
528 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
529 					   struct qedr_pbl_info *pbl_info,
530 					   gfp_t flags)
531 {
532 	struct pci_dev *pdev = dev->pdev;
533 	struct qedr_pbl *pbl_table;
534 	dma_addr_t *pbl_main_tbl;
535 	dma_addr_t pa;
536 	void *va;
537 	int i;
538 
539 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
540 	if (!pbl_table)
541 		return ERR_PTR(-ENOMEM);
542 
543 	for (i = 0; i < pbl_info->num_pbls; i++) {
544 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
545 					flags);
546 		if (!va)
547 			goto err;
548 
549 		pbl_table[i].va = va;
550 		pbl_table[i].pa = pa;
551 	}
552 
553 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
554 	 * the first one with physical pointers to all of the rest
555 	 */
556 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
557 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
558 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
559 
560 	return pbl_table;
561 
562 err:
563 	for (i--; i >= 0; i--)
564 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
565 				  pbl_table[i].va, pbl_table[i].pa);
566 
567 	qedr_free_pbl(dev, pbl_info, pbl_table);
568 
569 	return ERR_PTR(-ENOMEM);
570 }
571 
572 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
573 				struct qedr_pbl_info *pbl_info,
574 				u32 num_pbes, int two_layer_capable)
575 {
576 	u32 pbl_capacity;
577 	u32 pbl_size;
578 	u32 num_pbls;
579 
580 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
581 		if (num_pbes > MAX_PBES_TWO_LAYER) {
582 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
583 			       num_pbes);
584 			return -EINVAL;
585 		}
586 
587 		/* calculate required pbl page size */
588 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
589 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
590 			       NUM_PBES_ON_PAGE(pbl_size);
591 
592 		while (pbl_capacity < num_pbes) {
593 			pbl_size *= 2;
594 			pbl_capacity = pbl_size / sizeof(u64);
595 			pbl_capacity = pbl_capacity * pbl_capacity;
596 		}
597 
598 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
599 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
600 		pbl_info->two_layered = true;
601 	} else {
602 		/* One layered PBL */
603 		num_pbls = 1;
604 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
605 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
606 		pbl_info->two_layered = false;
607 	}
608 
609 	pbl_info->num_pbls = num_pbls;
610 	pbl_info->pbl_size = pbl_size;
611 	pbl_info->num_pbes = num_pbes;
612 
613 	DP_DEBUG(dev, QEDR_MSG_MR,
614 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
615 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
616 
617 	return 0;
618 }
619 
620 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
621 			       struct qedr_pbl *pbl,
622 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
623 {
624 	int pbe_cnt, total_num_pbes = 0;
625 	struct qedr_pbl *pbl_tbl;
626 	struct ib_block_iter biter;
627 	struct regpair *pbe;
628 
629 	if (!pbl_info->num_pbes)
630 		return;
631 
632 	/* If we have a two layered pbl, the first pbl points to the rest
633 	 * of the pbls and the first entry lays on the second pbl in the table
634 	 */
635 	if (pbl_info->two_layered)
636 		pbl_tbl = &pbl[1];
637 	else
638 		pbl_tbl = pbl;
639 
640 	pbe = (struct regpair *)pbl_tbl->va;
641 	if (!pbe) {
642 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
643 		return;
644 	}
645 
646 	pbe_cnt = 0;
647 
648 	rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) {
649 		u64 pg_addr = rdma_block_iter_dma_address(&biter);
650 
651 		pbe->lo = cpu_to_le32(pg_addr);
652 		pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
653 
654 		pbe_cnt++;
655 		total_num_pbes++;
656 		pbe++;
657 
658 		if (total_num_pbes == pbl_info->num_pbes)
659 			return;
660 
661 		/* If the given pbl is full storing the pbes, move to next pbl.
662 		 */
663 		if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
664 			pbl_tbl++;
665 			pbe = (struct regpair *)pbl_tbl->va;
666 			pbe_cnt = 0;
667 		}
668 	}
669 }
670 
671 static int qedr_db_recovery_add(struct qedr_dev *dev,
672 				void __iomem *db_addr,
673 				void *db_data,
674 				enum qed_db_rec_width db_width,
675 				enum qed_db_rec_space db_space)
676 {
677 	if (!db_data) {
678 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
679 		return 0;
680 	}
681 
682 	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
683 						 db_width, db_space);
684 }
685 
686 static void qedr_db_recovery_del(struct qedr_dev *dev,
687 				 void __iomem *db_addr,
688 				 void *db_data)
689 {
690 	if (!db_data) {
691 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
692 		return;
693 	}
694 
695 	/* Ignore return code as there is not much we can do about it. Error
696 	 * log will be printed inside.
697 	 */
698 	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
699 }
700 
701 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
702 			      struct qedr_cq *cq, struct ib_udata *udata,
703 			      u32 db_offset)
704 {
705 	struct qedr_create_cq_uresp uresp;
706 	int rc;
707 
708 	memset(&uresp, 0, sizeof(uresp));
709 
710 	uresp.db_offset = db_offset;
711 	uresp.icid = cq->icid;
712 	if (cq->q.db_mmap_entry)
713 		uresp.db_rec_addr =
714 			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
715 
716 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
717 	if (rc)
718 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
719 
720 	return rc;
721 }
722 
723 static void consume_cqe(struct qedr_cq *cq)
724 {
725 	if (cq->latest_cqe == cq->toggle_cqe)
726 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
727 
728 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
729 }
730 
731 static inline int qedr_align_cq_entries(int entries)
732 {
733 	u64 size, aligned_size;
734 
735 	/* We allocate an extra entry that we don't report to the FW. */
736 	size = (entries + 1) * QEDR_CQE_SIZE;
737 	aligned_size = ALIGN(size, PAGE_SIZE);
738 
739 	return aligned_size / QEDR_CQE_SIZE;
740 }
741 
742 static int qedr_init_user_db_rec(struct ib_udata *udata,
743 				 struct qedr_dev *dev, struct qedr_userq *q,
744 				 bool requires_db_rec)
745 {
746 	struct qedr_ucontext *uctx =
747 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
748 					  ibucontext);
749 	struct qedr_user_mmap_entry *entry;
750 	int rc;
751 
752 	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
753 	if (requires_db_rec == 0 || !uctx->db_rec)
754 		return 0;
755 
756 	/* Allocate a page for doorbell recovery, add to mmap */
757 	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
758 	if (!q->db_rec_data) {
759 		DP_ERR(dev, "get_zeroed_page failed\n");
760 		return -ENOMEM;
761 	}
762 
763 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
764 	if (!entry)
765 		goto err_free_db_data;
766 
767 	entry->address = q->db_rec_data;
768 	entry->length = PAGE_SIZE;
769 	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
770 	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
771 					 &entry->rdma_entry,
772 					 PAGE_SIZE);
773 	if (rc)
774 		goto err_free_entry;
775 
776 	q->db_mmap_entry = &entry->rdma_entry;
777 
778 	return 0;
779 
780 err_free_entry:
781 	kfree(entry);
782 
783 err_free_db_data:
784 	free_page((unsigned long)q->db_rec_data);
785 	q->db_rec_data = NULL;
786 	return -ENOMEM;
787 }
788 
789 static inline int qedr_init_user_queue(struct ib_udata *udata,
790 				       struct qedr_dev *dev,
791 				       struct qedr_userq *q, u64 buf_addr,
792 				       size_t buf_len, bool requires_db_rec,
793 				       int access,
794 				       int alloc_and_init)
795 {
796 	u32 fw_pages;
797 	int rc;
798 
799 	q->buf_addr = buf_addr;
800 	q->buf_len = buf_len;
801 	q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
802 	if (IS_ERR(q->umem)) {
803 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
804 		       PTR_ERR(q->umem));
805 		return PTR_ERR(q->umem);
806 	}
807 
808 	fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
809 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
810 	if (rc)
811 		goto err0;
812 
813 	if (alloc_and_init) {
814 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
815 		if (IS_ERR(q->pbl_tbl)) {
816 			rc = PTR_ERR(q->pbl_tbl);
817 			goto err0;
818 		}
819 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
820 				   FW_PAGE_SHIFT);
821 	} else {
822 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
823 		if (!q->pbl_tbl) {
824 			rc = -ENOMEM;
825 			goto err0;
826 		}
827 	}
828 
829 	/* mmap the user address used to store doorbell data for recovery */
830 	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
831 
832 err0:
833 	ib_umem_release(q->umem);
834 	q->umem = NULL;
835 
836 	return rc;
837 }
838 
839 static inline void qedr_init_cq_params(struct qedr_cq *cq,
840 				       struct qedr_ucontext *ctx,
841 				       struct qedr_dev *dev, int vector,
842 				       int chain_entries, int page_cnt,
843 				       u64 pbl_ptr,
844 				       struct qed_rdma_create_cq_in_params
845 				       *params)
846 {
847 	memset(params, 0, sizeof(*params));
848 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
849 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
850 	params->cnq_id = vector;
851 	params->cq_size = chain_entries - 1;
852 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
853 	params->pbl_num_pages = page_cnt;
854 	params->pbl_ptr = pbl_ptr;
855 	params->pbl_two_level = 0;
856 }
857 
858 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
859 {
860 	cq->db.data.agg_flags = flags;
861 	cq->db.data.value = cpu_to_le32(cons);
862 	writeq(cq->db.raw, cq->db_addr);
863 }
864 
865 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
866 {
867 	struct qedr_cq *cq = get_qedr_cq(ibcq);
868 	unsigned long sflags;
869 	struct qedr_dev *dev;
870 
871 	dev = get_qedr_dev(ibcq->device);
872 
873 	if (cq->destroyed) {
874 		DP_ERR(dev,
875 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
876 		       cq, cq->icid);
877 		return -EINVAL;
878 	}
879 
880 
881 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
882 		return 0;
883 
884 	spin_lock_irqsave(&cq->cq_lock, sflags);
885 
886 	cq->arm_flags = 0;
887 
888 	if (flags & IB_CQ_SOLICITED)
889 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
890 
891 	if (flags & IB_CQ_NEXT_COMP)
892 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
893 
894 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
895 
896 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
897 
898 	return 0;
899 }
900 
901 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
902 		   struct ib_udata *udata)
903 {
904 	struct ib_device *ibdev = ibcq->device;
905 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
906 		udata, struct qedr_ucontext, ibucontext);
907 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
908 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
909 	struct qed_chain_init_params chain_params = {
910 		.mode		= QED_CHAIN_MODE_PBL,
911 		.intended_use	= QED_CHAIN_USE_TO_CONSUME,
912 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
913 		.elem_size	= sizeof(union rdma_cqe),
914 	};
915 	struct qedr_dev *dev = get_qedr_dev(ibdev);
916 	struct qed_rdma_create_cq_in_params params;
917 	struct qedr_create_cq_ureq ureq = {};
918 	int vector = attr->comp_vector;
919 	int entries = attr->cqe;
920 	struct qedr_cq *cq = get_qedr_cq(ibcq);
921 	int chain_entries;
922 	u32 db_offset;
923 	int page_cnt;
924 	u64 pbl_ptr;
925 	u16 icid;
926 	int rc;
927 
928 	DP_DEBUG(dev, QEDR_MSG_INIT,
929 		 "create_cq: called from %s. entries=%d, vector=%d\n",
930 		 udata ? "User Lib" : "Kernel", entries, vector);
931 
932 	if (attr->flags)
933 		return -EOPNOTSUPP;
934 
935 	if (entries > QEDR_MAX_CQES) {
936 		DP_ERR(dev,
937 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
938 		       entries, QEDR_MAX_CQES);
939 		return -EINVAL;
940 	}
941 
942 	chain_entries = qedr_align_cq_entries(entries);
943 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
944 	chain_params.num_elems = chain_entries;
945 
946 	/* calc db offset. user will add DPI base, kernel will add db addr */
947 	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
948 
949 	if (udata) {
950 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
951 							 udata->inlen))) {
952 			DP_ERR(dev,
953 			       "create cq: problem copying data from user space\n");
954 			goto err0;
955 		}
956 
957 		if (!ureq.len) {
958 			DP_ERR(dev,
959 			       "create cq: cannot create a cq with 0 entries\n");
960 			goto err0;
961 		}
962 
963 		cq->cq_type = QEDR_CQ_TYPE_USER;
964 
965 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
966 					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
967 					  1);
968 		if (rc)
969 			goto err0;
970 
971 		pbl_ptr = cq->q.pbl_tbl->pa;
972 		page_cnt = cq->q.pbl_info.num_pbes;
973 
974 		cq->ibcq.cqe = chain_entries;
975 		cq->q.db_addr = ctx->dpi_addr + db_offset;
976 	} else {
977 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
978 
979 		rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
980 						   &chain_params);
981 		if (rc)
982 			goto err0;
983 
984 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
985 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
986 		cq->ibcq.cqe = cq->pbl.capacity;
987 	}
988 
989 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
990 			    pbl_ptr, &params);
991 
992 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
993 	if (rc)
994 		goto err1;
995 
996 	cq->icid = icid;
997 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
998 	spin_lock_init(&cq->cq_lock);
999 
1000 	if (udata) {
1001 		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
1002 		if (rc)
1003 			goto err2;
1004 
1005 		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
1006 					  &cq->q.db_rec_data->db_data,
1007 					  DB_REC_WIDTH_64B,
1008 					  DB_REC_USER);
1009 		if (rc)
1010 			goto err2;
1011 
1012 	} else {
1013 		/* Generate doorbell address. */
1014 		cq->db.data.icid = cq->icid;
1015 		cq->db_addr = dev->db_addr + db_offset;
1016 		cq->db.data.params = DB_AGG_CMD_MAX <<
1017 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1018 
1019 		/* point to the very last element, passing it we will toggle */
1020 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1021 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1022 		cq->latest_cqe = NULL;
1023 		consume_cqe(cq);
1024 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1025 
1026 		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1027 					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
1028 		if (rc)
1029 			goto err2;
1030 	}
1031 
1032 	DP_DEBUG(dev, QEDR_MSG_CQ,
1033 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1034 		 cq->icid, cq, params.cq_size);
1035 
1036 	return 0;
1037 
1038 err2:
1039 	destroy_iparams.icid = cq->icid;
1040 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1041 				  &destroy_oparams);
1042 err1:
1043 	if (udata) {
1044 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1045 		ib_umem_release(cq->q.umem);
1046 		if (cq->q.db_mmap_entry)
1047 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1048 	} else {
1049 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1050 	}
1051 err0:
1052 	return -EINVAL;
1053 }
1054 
1055 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1056 {
1057 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1058 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1059 
1060 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1061 
1062 	return 0;
1063 }
1064 
1065 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1066 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1067 
1068 int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1069 {
1070 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1071 	struct qed_rdma_destroy_cq_out_params oparams;
1072 	struct qed_rdma_destroy_cq_in_params iparams;
1073 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1074 	int iter;
1075 
1076 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1077 
1078 	cq->destroyed = 1;
1079 
1080 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1081 	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1082 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1083 		return 0;
1084 	}
1085 
1086 	iparams.icid = cq->icid;
1087 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1088 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1089 
1090 	if (udata) {
1091 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1092 		ib_umem_release(cq->q.umem);
1093 
1094 		if (cq->q.db_rec_data) {
1095 			qedr_db_recovery_del(dev, cq->q.db_addr,
1096 					     &cq->q.db_rec_data->db_data);
1097 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1098 		}
1099 	} else {
1100 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1101 	}
1102 
1103 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1104 	 * wait until all CNQ interrupts, if any, are received. This will always
1105 	 * happen and will always happen very fast. If not, then a serious error
1106 	 * has occured. That is why we can use a long delay.
1107 	 * We spin for a short time so we don’t lose time on context switching
1108 	 * in case all the completions are handled in that span. Otherwise
1109 	 * we sleep for a while and check again. Since the CNQ may be
1110 	 * associated with (only) the current CPU we use msleep to allow the
1111 	 * current CPU to be freed.
1112 	 * The CNQ notification is increased in qedr_irq_handler().
1113 	 */
1114 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1115 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1116 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1117 		iter--;
1118 	}
1119 
1120 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1121 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1122 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1123 		iter--;
1124 	}
1125 
1126 	/* Note that we don't need to have explicit code to wait for the
1127 	 * completion of the event handler because it is invoked from the EQ.
1128 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1129 	 * be certain that there's no event handler in process.
1130 	 */
1131 	return 0;
1132 }
1133 
1134 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1135 					  struct ib_qp_attr *attr,
1136 					  int attr_mask,
1137 					  struct qed_rdma_modify_qp_in_params
1138 					  *qp_params)
1139 {
1140 	const struct ib_gid_attr *gid_attr;
1141 	enum rdma_network_type nw_type;
1142 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1143 	u32 ipv4_addr;
1144 	int ret;
1145 	int i;
1146 
1147 	gid_attr = grh->sgid_attr;
1148 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1149 	if (ret)
1150 		return ret;
1151 
1152 	nw_type = rdma_gid_attr_network_type(gid_attr);
1153 	switch (nw_type) {
1154 	case RDMA_NETWORK_IPV6:
1155 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1156 		       sizeof(qp_params->sgid));
1157 		memcpy(&qp_params->dgid.bytes[0],
1158 		       &grh->dgid,
1159 		       sizeof(qp_params->dgid));
1160 		qp_params->roce_mode = ROCE_V2_IPV6;
1161 		SET_FIELD(qp_params->modify_flags,
1162 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1163 		break;
1164 	case RDMA_NETWORK_ROCE_V1:
1165 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1166 		       sizeof(qp_params->sgid));
1167 		memcpy(&qp_params->dgid.bytes[0],
1168 		       &grh->dgid,
1169 		       sizeof(qp_params->dgid));
1170 		qp_params->roce_mode = ROCE_V1;
1171 		break;
1172 	case RDMA_NETWORK_IPV4:
1173 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1174 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1175 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1176 		qp_params->sgid.ipv4_addr = ipv4_addr;
1177 		ipv4_addr =
1178 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1179 		qp_params->dgid.ipv4_addr = ipv4_addr;
1180 		SET_FIELD(qp_params->modify_flags,
1181 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1182 		qp_params->roce_mode = ROCE_V2_IPV4;
1183 		break;
1184 	default:
1185 		return -EINVAL;
1186 	}
1187 
1188 	for (i = 0; i < 4; i++) {
1189 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1190 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1191 	}
1192 
1193 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1194 		qp_params->vlan_id = 0;
1195 
1196 	return 0;
1197 }
1198 
1199 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1200 			       struct ib_qp_init_attr *attrs,
1201 			       struct ib_udata *udata)
1202 {
1203 	struct qedr_device_attr *qattr = &dev->attr;
1204 
1205 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1206 	if (attrs->qp_type != IB_QPT_RC &&
1207 	    attrs->qp_type != IB_QPT_GSI &&
1208 	    attrs->qp_type != IB_QPT_XRC_INI &&
1209 	    attrs->qp_type != IB_QPT_XRC_TGT) {
1210 		DP_DEBUG(dev, QEDR_MSG_QP,
1211 			 "create qp: unsupported qp type=0x%x requested\n",
1212 			 attrs->qp_type);
1213 		return -EOPNOTSUPP;
1214 	}
1215 
1216 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1217 		DP_ERR(dev,
1218 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1219 		       attrs->cap.max_send_wr, qattr->max_sqe);
1220 		return -EINVAL;
1221 	}
1222 
1223 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1224 		DP_ERR(dev,
1225 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1226 		       attrs->cap.max_inline_data, qattr->max_inline);
1227 		return -EINVAL;
1228 	}
1229 
1230 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1231 		DP_ERR(dev,
1232 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1233 		       attrs->cap.max_send_sge, qattr->max_sge);
1234 		return -EINVAL;
1235 	}
1236 
1237 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1238 		DP_ERR(dev,
1239 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1240 		       attrs->cap.max_recv_sge, qattr->max_sge);
1241 		return -EINVAL;
1242 	}
1243 
1244 	/* verify consumer QPs are not trying to use GSI QP's CQ.
1245 	 * TGT QP isn't associated with RQ/SQ
1246 	 */
1247 	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
1248 	    (attrs->qp_type != IB_QPT_XRC_TGT) &&
1249 	    (attrs->qp_type != IB_QPT_XRC_INI)) {
1250 		struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
1251 		struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
1252 
1253 		if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) ||
1254 		    (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) {
1255 			DP_ERR(dev,
1256 			       "create qp: consumer QP cannot use GSI CQs.\n");
1257 			return -EINVAL;
1258 		}
1259 	}
1260 
1261 	return 0;
1262 }
1263 
1264 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1265 			       struct qedr_srq *srq, struct ib_udata *udata)
1266 {
1267 	struct qedr_create_srq_uresp uresp = {};
1268 	int rc;
1269 
1270 	uresp.srq_id = srq->srq_id;
1271 
1272 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1273 	if (rc)
1274 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1275 
1276 	return rc;
1277 }
1278 
1279 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1280 			       struct qedr_create_qp_uresp *uresp,
1281 			       struct qedr_qp *qp)
1282 {
1283 	/* iWARP requires two doorbells per RQ. */
1284 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1285 		uresp->rq_db_offset =
1286 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1287 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1288 	} else {
1289 		uresp->rq_db_offset =
1290 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1291 	}
1292 
1293 	uresp->rq_icid = qp->icid;
1294 	if (qp->urq.db_mmap_entry)
1295 		uresp->rq_db_rec_addr =
1296 			rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1297 }
1298 
1299 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1300 			       struct qedr_create_qp_uresp *uresp,
1301 			       struct qedr_qp *qp)
1302 {
1303 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1304 
1305 	/* iWARP uses the same cid for rq and sq */
1306 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1307 		uresp->sq_icid = qp->icid;
1308 	else
1309 		uresp->sq_icid = qp->icid + 1;
1310 
1311 	if (qp->usq.db_mmap_entry)
1312 		uresp->sq_db_rec_addr =
1313 			rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1314 }
1315 
1316 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1317 			      struct qedr_qp *qp, struct ib_udata *udata,
1318 			      struct qedr_create_qp_uresp *uresp)
1319 {
1320 	int rc;
1321 
1322 	memset(uresp, 0, sizeof(*uresp));
1323 
1324 	if (qedr_qp_has_sq(qp))
1325 		qedr_copy_sq_uresp(dev, uresp, qp);
1326 
1327 	if (qedr_qp_has_rq(qp))
1328 		qedr_copy_rq_uresp(dev, uresp, qp);
1329 
1330 	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1331 	uresp->qp_id = qp->qp_id;
1332 
1333 	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1334 	if (rc)
1335 		DP_ERR(dev,
1336 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1337 		       qp->icid);
1338 
1339 	return rc;
1340 }
1341 
1342 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1343 {
1344 	qed_chain_reset(&qph->pbl);
1345 	qph->prod = 0;
1346 	qph->cons = 0;
1347 	qph->wqe_cons = 0;
1348 	qph->db_data.data.value = cpu_to_le16(0);
1349 }
1350 
1351 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1352 				      struct qedr_qp *qp,
1353 				      struct qedr_pd *pd,
1354 				      struct ib_qp_init_attr *attrs)
1355 {
1356 	spin_lock_init(&qp->q_lock);
1357 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1358 		kref_init(&qp->refcnt);
1359 		init_completion(&qp->iwarp_cm_comp);
1360 	}
1361 
1362 	qp->pd = pd;
1363 	qp->qp_type = attrs->qp_type;
1364 	qp->max_inline_data = attrs->cap.max_inline_data;
1365 	qp->state = QED_ROCE_QP_STATE_RESET;
1366 
1367 	qp->prev_wqe_size = 0;
1368 
1369 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1370 	qp->dev = dev;
1371 	if (qedr_qp_has_sq(qp)) {
1372 		qedr_reset_qp_hwq_info(&qp->sq);
1373 		qp->sq.max_sges = attrs->cap.max_send_sge;
1374 		qp->sq_cq = get_qedr_cq(attrs->send_cq);
1375 		DP_DEBUG(dev, QEDR_MSG_QP,
1376 			 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1377 			 qp->sq.max_sges, qp->sq_cq->icid);
1378 	}
1379 
1380 	if (attrs->srq)
1381 		qp->srq = get_qedr_srq(attrs->srq);
1382 
1383 	if (qedr_qp_has_rq(qp)) {
1384 		qedr_reset_qp_hwq_info(&qp->rq);
1385 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1386 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1387 		DP_DEBUG(dev, QEDR_MSG_QP,
1388 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1389 			 qp->rq.max_sges, qp->rq_cq->icid);
1390 	}
1391 
1392 	DP_DEBUG(dev, QEDR_MSG_QP,
1393 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1394 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1395 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1396 	DP_DEBUG(dev, QEDR_MSG_QP,
1397 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1398 		 qp->sq.max_sges, qp->sq_cq->icid);
1399 }
1400 
1401 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1402 {
1403 	int rc = 0;
1404 
1405 	if (qedr_qp_has_sq(qp)) {
1406 		qp->sq.db = dev->db_addr +
1407 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1408 		qp->sq.db_data.data.icid = qp->icid + 1;
1409 		rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data,
1410 					  DB_REC_WIDTH_32B, DB_REC_KERNEL);
1411 		if (rc)
1412 			return rc;
1413 	}
1414 
1415 	if (qedr_qp_has_rq(qp)) {
1416 		qp->rq.db = dev->db_addr +
1417 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1418 		qp->rq.db_data.data.icid = qp->icid;
1419 		rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data,
1420 					  DB_REC_WIDTH_32B, DB_REC_KERNEL);
1421 		if (rc && qedr_qp_has_sq(qp))
1422 			qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
1423 	}
1424 
1425 	return rc;
1426 }
1427 
1428 static int qedr_check_srq_params(struct qedr_dev *dev,
1429 				 struct ib_srq_init_attr *attrs,
1430 				 struct ib_udata *udata)
1431 {
1432 	struct qedr_device_attr *qattr = &dev->attr;
1433 
1434 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1435 		DP_ERR(dev,
1436 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1437 		       attrs->attr.max_wr, qattr->max_srq_wr);
1438 		return -EINVAL;
1439 	}
1440 
1441 	if (attrs->attr.max_sge > qattr->max_sge) {
1442 		DP_ERR(dev,
1443 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1444 		       attrs->attr.max_sge, qattr->max_sge);
1445 	}
1446 
1447 	if (!udata && attrs->srq_type == IB_SRQT_XRC) {
1448 		DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n");
1449 		return -EINVAL;
1450 	}
1451 
1452 	return 0;
1453 }
1454 
1455 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1456 {
1457 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1458 	ib_umem_release(srq->usrq.umem);
1459 	ib_umem_release(srq->prod_umem);
1460 }
1461 
1462 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1463 {
1464 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1465 	struct qedr_dev *dev = srq->dev;
1466 
1467 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1468 
1469 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1470 			  hw_srq->virt_prod_pair_addr,
1471 			  hw_srq->phy_prod_pair_addr);
1472 }
1473 
1474 static int qedr_init_srq_user_params(struct ib_udata *udata,
1475 				     struct qedr_srq *srq,
1476 				     struct qedr_create_srq_ureq *ureq,
1477 				     int access)
1478 {
1479 	struct scatterlist *sg;
1480 	int rc;
1481 
1482 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1483 				  ureq->srq_len, false, access, 1);
1484 	if (rc)
1485 		return rc;
1486 
1487 	srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1488 				     sizeof(struct rdma_srq_producers), access);
1489 	if (IS_ERR(srq->prod_umem)) {
1490 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1491 		ib_umem_release(srq->usrq.umem);
1492 		DP_ERR(srq->dev,
1493 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1494 		       PTR_ERR(srq->prod_umem));
1495 		return PTR_ERR(srq->prod_umem);
1496 	}
1497 
1498 	sg = srq->prod_umem->sgt_append.sgt.sgl;
1499 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1500 
1501 	return 0;
1502 }
1503 
1504 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1505 					struct qedr_dev *dev,
1506 					struct ib_srq_init_attr *init_attr)
1507 {
1508 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1509 	struct qed_chain_init_params params = {
1510 		.mode		= QED_CHAIN_MODE_PBL,
1511 		.intended_use	= QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1512 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1513 		.elem_size	= QEDR_SRQ_WQE_ELEM_SIZE,
1514 	};
1515 	dma_addr_t phy_prod_pair_addr;
1516 	u32 num_elems;
1517 	void *va;
1518 	int rc;
1519 
1520 	va = dma_alloc_coherent(&dev->pdev->dev,
1521 				sizeof(struct rdma_srq_producers),
1522 				&phy_prod_pair_addr, GFP_KERNEL);
1523 	if (!va) {
1524 		DP_ERR(dev,
1525 		       "create srq: failed to allocate dma memory for producer\n");
1526 		return -ENOMEM;
1527 	}
1528 
1529 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1530 	hw_srq->virt_prod_pair_addr = va;
1531 
1532 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1533 	params.num_elems = num_elems;
1534 
1535 	rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
1536 	if (rc)
1537 		goto err0;
1538 
1539 	hw_srq->num_elems = num_elems;
1540 
1541 	return 0;
1542 
1543 err0:
1544 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1545 			  va, phy_prod_pair_addr);
1546 	return rc;
1547 }
1548 
1549 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1550 		    struct ib_udata *udata)
1551 {
1552 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1553 	struct qed_rdma_create_srq_in_params in_params = {};
1554 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1555 	struct qed_rdma_create_srq_out_params out_params;
1556 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1557 	struct qedr_create_srq_ureq ureq = {};
1558 	u64 pbl_base_addr, phy_prod_pair_addr;
1559 	struct qedr_srq_hwq_info *hw_srq;
1560 	u32 page_cnt, page_size;
1561 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1562 	int rc = 0;
1563 
1564 	DP_DEBUG(dev, QEDR_MSG_QP,
1565 		 "create SRQ called from %s (pd %p)\n",
1566 		 (udata) ? "User lib" : "kernel", pd);
1567 
1568 	if (init_attr->srq_type != IB_SRQT_BASIC &&
1569 	    init_attr->srq_type != IB_SRQT_XRC)
1570 		return -EOPNOTSUPP;
1571 
1572 	rc = qedr_check_srq_params(dev, init_attr, udata);
1573 	if (rc)
1574 		return -EINVAL;
1575 
1576 	srq->dev = dev;
1577 	srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
1578 	hw_srq = &srq->hw_srq;
1579 	spin_lock_init(&srq->lock);
1580 
1581 	hw_srq->max_wr = init_attr->attr.max_wr;
1582 	hw_srq->max_sges = init_attr->attr.max_sge;
1583 
1584 	if (udata) {
1585 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1586 							 udata->inlen))) {
1587 			DP_ERR(dev,
1588 			       "create srq: problem copying data from user space\n");
1589 			goto err0;
1590 		}
1591 
1592 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1593 		if (rc)
1594 			goto err0;
1595 
1596 		page_cnt = srq->usrq.pbl_info.num_pbes;
1597 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1598 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1599 		page_size = PAGE_SIZE;
1600 	} else {
1601 		struct qed_chain *pbl;
1602 
1603 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1604 		if (rc)
1605 			goto err0;
1606 
1607 		pbl = &hw_srq->pbl;
1608 		page_cnt = qed_chain_get_page_cnt(pbl);
1609 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1610 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1611 		page_size = QED_CHAIN_PAGE_SIZE;
1612 	}
1613 
1614 	in_params.pd_id = pd->pd_id;
1615 	in_params.pbl_base_addr = pbl_base_addr;
1616 	in_params.prod_pair_addr = phy_prod_pair_addr;
1617 	in_params.num_pages = page_cnt;
1618 	in_params.page_size = page_size;
1619 	if (srq->is_xrc) {
1620 		struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd);
1621 		struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq);
1622 
1623 		in_params.is_xrc = 1;
1624 		in_params.xrcd_id = xrcd->xrcd_id;
1625 		in_params.cq_cid = cq->icid;
1626 	}
1627 
1628 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1629 	if (rc)
1630 		goto err1;
1631 
1632 	srq->srq_id = out_params.srq_id;
1633 
1634 	if (udata) {
1635 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1636 		if (rc)
1637 			goto err2;
1638 	}
1639 
1640 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1641 	if (rc)
1642 		goto err2;
1643 
1644 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1645 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1646 	return 0;
1647 
1648 err2:
1649 	destroy_in_params.srq_id = srq->srq_id;
1650 
1651 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1652 err1:
1653 	if (udata)
1654 		qedr_free_srq_user_params(srq);
1655 	else
1656 		qedr_free_srq_kernel_params(srq);
1657 err0:
1658 	return -EFAULT;
1659 }
1660 
1661 int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1662 {
1663 	struct qed_rdma_destroy_srq_in_params in_params = {};
1664 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1665 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1666 
1667 	xa_erase_irq(&dev->srqs, srq->srq_id);
1668 	in_params.srq_id = srq->srq_id;
1669 	in_params.is_xrc = srq->is_xrc;
1670 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1671 
1672 	if (ibsrq->uobject)
1673 		qedr_free_srq_user_params(srq);
1674 	else
1675 		qedr_free_srq_kernel_params(srq);
1676 
1677 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1678 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1679 		 srq->srq_id);
1680 	return 0;
1681 }
1682 
1683 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1684 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1685 {
1686 	struct qed_rdma_modify_srq_in_params in_params = {};
1687 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1688 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1689 	int rc;
1690 
1691 	if (attr_mask & IB_SRQ_MAX_WR) {
1692 		DP_ERR(dev,
1693 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1694 		       attr_mask, srq);
1695 		return -EINVAL;
1696 	}
1697 
1698 	if (attr_mask & IB_SRQ_LIMIT) {
1699 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1700 			DP_ERR(dev,
1701 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1702 			       attr->srq_limit, srq->hw_srq.max_wr);
1703 			return -EINVAL;
1704 		}
1705 
1706 		in_params.srq_id = srq->srq_id;
1707 		in_params.wqe_limit = attr->srq_limit;
1708 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1709 		if (rc)
1710 			return rc;
1711 	}
1712 
1713 	srq->srq_limit = attr->srq_limit;
1714 
1715 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1716 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1717 
1718 	return 0;
1719 }
1720 
1721 static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type)
1722 {
1723 	switch (ib_qp_type) {
1724 	case IB_QPT_RC:
1725 		return QED_RDMA_QP_TYPE_RC;
1726 	case IB_QPT_XRC_INI:
1727 		return QED_RDMA_QP_TYPE_XRC_INI;
1728 	case IB_QPT_XRC_TGT:
1729 		return QED_RDMA_QP_TYPE_XRC_TGT;
1730 	default:
1731 		return QED_RDMA_QP_TYPE_INVAL;
1732 	}
1733 }
1734 
1735 static inline void
1736 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1737 			      struct qedr_pd *pd,
1738 			      struct qedr_qp *qp,
1739 			      struct ib_qp_init_attr *attrs,
1740 			      bool fmr_and_reserved_lkey,
1741 			      struct qed_rdma_create_qp_in_params *params)
1742 {
1743 	/* QP handle to be written in an async event */
1744 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1745 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1746 
1747 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1748 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1749 	params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type);
1750 	params->stats_queue = 0;
1751 
1752 	if (pd) {
1753 		params->pd = pd->pd_id;
1754 		params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1755 	}
1756 
1757 	if (qedr_qp_has_sq(qp))
1758 		params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1759 
1760 	if (qedr_qp_has_rq(qp))
1761 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1762 
1763 	if (qedr_qp_has_srq(qp)) {
1764 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1765 		params->srq_id = qp->srq->srq_id;
1766 		params->use_srq = true;
1767 	} else {
1768 		params->srq_id = 0;
1769 		params->use_srq = false;
1770 	}
1771 }
1772 
1773 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1774 {
1775 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1776 		 "qp=%p. "
1777 		 "sq_addr=0x%llx, "
1778 		 "sq_len=%zd, "
1779 		 "rq_addr=0x%llx, "
1780 		 "rq_len=%zd"
1781 		 "\n",
1782 		 qp,
1783 		 qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0,
1784 		 qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0,
1785 		 qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0,
1786 		 qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0);
1787 }
1788 
1789 static inline void
1790 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1791 			    struct qedr_qp *qp,
1792 			    struct qed_rdma_create_qp_out_params *out_params)
1793 {
1794 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1795 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1796 
1797 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1798 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1799 	if (!qp->srq) {
1800 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1801 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1802 	}
1803 
1804 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1805 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1806 }
1807 
1808 static void qedr_cleanup_user(struct qedr_dev *dev,
1809 			      struct qedr_ucontext *ctx,
1810 			      struct qedr_qp *qp)
1811 {
1812 	if (qedr_qp_has_sq(qp)) {
1813 		ib_umem_release(qp->usq.umem);
1814 		qp->usq.umem = NULL;
1815 	}
1816 
1817 	if (qedr_qp_has_rq(qp)) {
1818 		ib_umem_release(qp->urq.umem);
1819 		qp->urq.umem = NULL;
1820 	}
1821 
1822 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1823 		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1824 		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1825 	} else {
1826 		kfree(qp->usq.pbl_tbl);
1827 		kfree(qp->urq.pbl_tbl);
1828 	}
1829 
1830 	if (qp->usq.db_rec_data) {
1831 		qedr_db_recovery_del(dev, qp->usq.db_addr,
1832 				     &qp->usq.db_rec_data->db_data);
1833 		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1834 	}
1835 
1836 	if (qp->urq.db_rec_data) {
1837 		qedr_db_recovery_del(dev, qp->urq.db_addr,
1838 				     &qp->urq.db_rec_data->db_data);
1839 		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1840 	}
1841 
1842 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1843 		qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1844 				     &qp->urq.db_rec_db2_data);
1845 }
1846 
1847 static int qedr_create_user_qp(struct qedr_dev *dev,
1848 			       struct qedr_qp *qp,
1849 			       struct ib_pd *ibpd,
1850 			       struct ib_udata *udata,
1851 			       struct ib_qp_init_attr *attrs)
1852 {
1853 	struct qed_rdma_create_qp_in_params in_params;
1854 	struct qed_rdma_create_qp_out_params out_params;
1855 	struct qedr_create_qp_uresp uresp = {};
1856 	struct qedr_create_qp_ureq ureq = {};
1857 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1858 	struct qedr_ucontext *ctx = NULL;
1859 	struct qedr_pd *pd = NULL;
1860 	int rc = 0;
1861 
1862 	qp->create_type = QEDR_QP_CREATE_USER;
1863 
1864 	if (ibpd) {
1865 		pd = get_qedr_pd(ibpd);
1866 		ctx = pd->uctx;
1867 	}
1868 
1869 	if (udata) {
1870 		rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1871 					udata->inlen));
1872 		if (rc) {
1873 			DP_ERR(dev, "Problem copying data from user space\n");
1874 			return rc;
1875 		}
1876 	}
1877 
1878 	if (qedr_qp_has_sq(qp)) {
1879 		/* SQ - read access only (0) */
1880 		rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1881 					  ureq.sq_len, true, 0, alloc_and_init);
1882 		if (rc)
1883 			return rc;
1884 	}
1885 
1886 	if (qedr_qp_has_rq(qp)) {
1887 		/* RQ - read access only (0) */
1888 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1889 					  ureq.rq_len, true, 0, alloc_and_init);
1890 		if (rc)
1891 			return rc;
1892 	}
1893 
1894 	memset(&in_params, 0, sizeof(in_params));
1895 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1896 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1897 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1898 
1899 	if (qp->qp_type == IB_QPT_XRC_TGT) {
1900 		struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd);
1901 
1902 		in_params.xrcd_id = xrcd->xrcd_id;
1903 		in_params.qp_handle_lo = qp->qp_id;
1904 		in_params.use_srq = 1;
1905 	}
1906 
1907 	if (qedr_qp_has_sq(qp)) {
1908 		in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1909 		in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1910 	}
1911 
1912 	if (qedr_qp_has_rq(qp)) {
1913 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1914 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1915 	}
1916 
1917 	if (ctx)
1918 		SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
1919 
1920 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1921 					      &in_params, &out_params);
1922 
1923 	if (!qp->qed_qp) {
1924 		rc = -ENOMEM;
1925 		goto err1;
1926 	}
1927 
1928 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1929 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1930 
1931 	qp->qp_id = out_params.qp_id;
1932 	qp->icid = out_params.icid;
1933 
1934 	if (udata) {
1935 		rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1936 		if (rc)
1937 			goto err;
1938 	}
1939 
1940 	/* db offset was calculated in copy_qp_uresp, now set in the user q */
1941 	if (qedr_qp_has_sq(qp)) {
1942 		qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1943 		rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1944 					  &qp->usq.db_rec_data->db_data,
1945 					  DB_REC_WIDTH_32B,
1946 					  DB_REC_USER);
1947 		if (rc)
1948 			goto err;
1949 	}
1950 
1951 	if (qedr_qp_has_rq(qp)) {
1952 		qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1953 		rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1954 					  &qp->urq.db_rec_data->db_data,
1955 					  DB_REC_WIDTH_32B,
1956 					  DB_REC_USER);
1957 		if (rc)
1958 			goto err;
1959 	}
1960 
1961 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1962 		qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
1963 
1964 		/* calculate the db_rec_db2 data since it is constant so no
1965 		 * need to reflect from user
1966 		 */
1967 		qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
1968 		qp->urq.db_rec_db2_data.data.value =
1969 			cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
1970 
1971 		rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1972 					  &qp->urq.db_rec_db2_data,
1973 					  DB_REC_WIDTH_32B,
1974 					  DB_REC_USER);
1975 		if (rc)
1976 			goto err;
1977 	}
1978 	qedr_qp_user_print(dev, qp);
1979 	return rc;
1980 err:
1981 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1982 	if (rc)
1983 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1984 
1985 err1:
1986 	qedr_cleanup_user(dev, ctx, qp);
1987 	return rc;
1988 }
1989 
1990 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1991 {
1992 	int rc;
1993 
1994 	qp->sq.db = dev->db_addr +
1995 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1996 	qp->sq.db_data.data.icid = qp->icid;
1997 
1998 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1999 				  &qp->sq.db_data,
2000 				  DB_REC_WIDTH_32B,
2001 				  DB_REC_KERNEL);
2002 	if (rc)
2003 		return rc;
2004 
2005 	qp->rq.db = dev->db_addr +
2006 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
2007 	qp->rq.db_data.data.icid = qp->icid;
2008 	qp->rq.iwarp_db2 = dev->db_addr +
2009 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
2010 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
2011 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
2012 
2013 	rc = qedr_db_recovery_add(dev, qp->rq.db,
2014 				  &qp->rq.db_data,
2015 				  DB_REC_WIDTH_32B,
2016 				  DB_REC_KERNEL);
2017 	if (rc)
2018 		return rc;
2019 
2020 	rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
2021 				  &qp->rq.iwarp_db2_data,
2022 				  DB_REC_WIDTH_32B,
2023 				  DB_REC_KERNEL);
2024 	return rc;
2025 }
2026 
2027 static int
2028 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
2029 			   struct qedr_qp *qp,
2030 			   struct qed_rdma_create_qp_in_params *in_params,
2031 			   u32 n_sq_elems, u32 n_rq_elems)
2032 {
2033 	struct qed_rdma_create_qp_out_params out_params;
2034 	struct qed_chain_init_params params = {
2035 		.mode		= QED_CHAIN_MODE_PBL,
2036 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
2037 	};
2038 	int rc;
2039 
2040 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
2041 	params.num_elems = n_sq_elems;
2042 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
2043 
2044 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
2045 	if (rc)
2046 		return rc;
2047 
2048 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
2049 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
2050 
2051 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
2052 	params.num_elems = n_rq_elems;
2053 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
2054 
2055 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2056 	if (rc)
2057 		return rc;
2058 
2059 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
2060 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
2061 
2062 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
2063 					      in_params, &out_params);
2064 
2065 	if (!qp->qed_qp)
2066 		return -EINVAL;
2067 
2068 	qp->qp_id = out_params.qp_id;
2069 	qp->icid = out_params.icid;
2070 
2071 	return qedr_set_roce_db_info(dev, qp);
2072 }
2073 
2074 static int
2075 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
2076 			    struct qedr_qp *qp,
2077 			    struct qed_rdma_create_qp_in_params *in_params,
2078 			    u32 n_sq_elems, u32 n_rq_elems)
2079 {
2080 	struct qed_rdma_create_qp_out_params out_params;
2081 	struct qed_chain_init_params params = {
2082 		.mode		= QED_CHAIN_MODE_PBL,
2083 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
2084 	};
2085 	int rc;
2086 
2087 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
2088 						     QEDR_SQE_ELEMENT_SIZE,
2089 						     QED_CHAIN_PAGE_SIZE,
2090 						     QED_CHAIN_MODE_PBL);
2091 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
2092 						     QEDR_RQE_ELEMENT_SIZE,
2093 						     QED_CHAIN_PAGE_SIZE,
2094 						     QED_CHAIN_MODE_PBL);
2095 
2096 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
2097 					      in_params, &out_params);
2098 
2099 	if (!qp->qed_qp)
2100 		return -EINVAL;
2101 
2102 	/* Now we allocate the chain */
2103 
2104 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
2105 	params.num_elems = n_sq_elems;
2106 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
2107 	params.ext_pbl_virt = out_params.sq_pbl_virt;
2108 	params.ext_pbl_phys = out_params.sq_pbl_phys;
2109 
2110 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
2111 	if (rc)
2112 		goto err;
2113 
2114 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
2115 	params.num_elems = n_rq_elems;
2116 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
2117 	params.ext_pbl_virt = out_params.rq_pbl_virt;
2118 	params.ext_pbl_phys = out_params.rq_pbl_phys;
2119 
2120 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2121 	if (rc)
2122 		goto err;
2123 
2124 	qp->qp_id = out_params.qp_id;
2125 	qp->icid = out_params.icid;
2126 
2127 	return qedr_set_iwarp_db_info(dev, qp);
2128 
2129 err:
2130 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2131 
2132 	return rc;
2133 }
2134 
2135 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2136 {
2137 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2138 	kfree(qp->wqe_wr_id);
2139 
2140 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2141 	kfree(qp->rqe_wr_id);
2142 
2143 	/* GSI qp is not registered to db mechanism so no need to delete */
2144 	if (qp->qp_type == IB_QPT_GSI)
2145 		return;
2146 
2147 	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2148 
2149 	if (!qp->srq) {
2150 		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2151 
2152 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2153 			qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2154 					     &qp->rq.iwarp_db2_data);
2155 	}
2156 }
2157 
2158 static int qedr_create_kernel_qp(struct qedr_dev *dev,
2159 				 struct qedr_qp *qp,
2160 				 struct ib_pd *ibpd,
2161 				 struct ib_qp_init_attr *attrs)
2162 {
2163 	struct qed_rdma_create_qp_in_params in_params;
2164 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2165 	int rc = -EINVAL;
2166 	u32 n_rq_elems;
2167 	u32 n_sq_elems;
2168 	u32 n_sq_entries;
2169 
2170 	memset(&in_params, 0, sizeof(in_params));
2171 	qp->create_type = QEDR_QP_CREATE_KERNEL;
2172 
2173 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2174 	 * the ring. The ring should allow at least a single WR, even if the
2175 	 * user requested none, due to allocation issues.
2176 	 * We should add an extra WR since the prod and cons indices of
2177 	 * wqe_wr_id are managed in such a way that the WQ is considered full
2178 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
2179 	 * double the number of entries due an iSER issue that pushes far more
2180 	 * WRs than indicated. If we decline its ib_post_send() then we get
2181 	 * error prints in the dmesg we'd like to avoid.
2182 	 */
2183 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2184 			      dev->attr.max_sqe);
2185 
2186 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2187 				GFP_KERNEL);
2188 	if (!qp->wqe_wr_id) {
2189 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2190 		return -ENOMEM;
2191 	}
2192 
2193 	/* QP handle to be written in CQE */
2194 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2195 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2196 
2197 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2198 	 * the ring. There ring should allow at least a single WR, even if the
2199 	 * user requested none, due to allocation issues.
2200 	 */
2201 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2202 
2203 	/* Allocate driver internal RQ array */
2204 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2205 				GFP_KERNEL);
2206 	if (!qp->rqe_wr_id) {
2207 		DP_ERR(dev,
2208 		       "create qp: failed RQ shadow memory allocation\n");
2209 		kfree(qp->wqe_wr_id);
2210 		return -ENOMEM;
2211 	}
2212 
2213 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2214 
2215 	n_sq_entries = attrs->cap.max_send_wr;
2216 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2217 	n_sq_entries = max_t(u32, n_sq_entries, 1);
2218 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2219 
2220 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2221 
2222 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2223 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2224 						 n_sq_elems, n_rq_elems);
2225 	else
2226 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2227 						n_sq_elems, n_rq_elems);
2228 	if (rc)
2229 		qedr_cleanup_kernel(dev, qp);
2230 
2231 	return rc;
2232 }
2233 
2234 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2235 				  struct ib_udata *udata)
2236 {
2237 	struct qedr_ucontext *ctx =
2238 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2239 					  ibucontext);
2240 	int rc;
2241 
2242 	if (qp->qp_type != IB_QPT_GSI) {
2243 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2244 		if (rc)
2245 			return rc;
2246 	}
2247 
2248 	if (qp->create_type == QEDR_QP_CREATE_USER)
2249 		qedr_cleanup_user(dev, ctx, qp);
2250 	else
2251 		qedr_cleanup_kernel(dev, qp);
2252 
2253 	return 0;
2254 }
2255 
2256 int qedr_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
2257 		   struct ib_udata *udata)
2258 {
2259 	struct qedr_xrcd *xrcd = NULL;
2260 	struct ib_pd *ibpd = ibqp->pd;
2261 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2262 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2263 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2264 	int rc = 0;
2265 
2266 	if (attrs->create_flags)
2267 		return -EOPNOTSUPP;
2268 
2269 	if (attrs->qp_type == IB_QPT_XRC_TGT)
2270 		xrcd = get_qedr_xrcd(attrs->xrcd);
2271 	else
2272 		pd = get_qedr_pd(ibpd);
2273 
2274 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2275 		 udata ? "user library" : "kernel", pd);
2276 
2277 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2278 	if (rc)
2279 		return rc;
2280 
2281 	DP_DEBUG(dev, QEDR_MSG_QP,
2282 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2283 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2284 		 get_qedr_cq(attrs->send_cq),
2285 		 get_qedr_cq(attrs->send_cq)->icid,
2286 		 get_qedr_cq(attrs->recv_cq),
2287 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2288 
2289 	qedr_set_common_qp_params(dev, qp, pd, attrs);
2290 
2291 	if (attrs->qp_type == IB_QPT_GSI)
2292 		return qedr_create_gsi_qp(dev, attrs, qp);
2293 
2294 	if (udata || xrcd)
2295 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2296 	else
2297 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2298 
2299 	if (rc)
2300 		return rc;
2301 
2302 	qp->ibqp.qp_num = qp->qp_id;
2303 
2304 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2305 		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2306 		if (rc)
2307 			goto out_free_qp_resources;
2308 	}
2309 
2310 	return 0;
2311 
2312 out_free_qp_resources:
2313 	qedr_free_qp_resources(dev, qp, udata);
2314 	return -EFAULT;
2315 }
2316 
2317 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2318 {
2319 	switch (qp_state) {
2320 	case QED_ROCE_QP_STATE_RESET:
2321 		return IB_QPS_RESET;
2322 	case QED_ROCE_QP_STATE_INIT:
2323 		return IB_QPS_INIT;
2324 	case QED_ROCE_QP_STATE_RTR:
2325 		return IB_QPS_RTR;
2326 	case QED_ROCE_QP_STATE_RTS:
2327 		return IB_QPS_RTS;
2328 	case QED_ROCE_QP_STATE_SQD:
2329 		return IB_QPS_SQD;
2330 	case QED_ROCE_QP_STATE_ERR:
2331 		return IB_QPS_ERR;
2332 	case QED_ROCE_QP_STATE_SQE:
2333 		return IB_QPS_SQE;
2334 	}
2335 	return IB_QPS_ERR;
2336 }
2337 
2338 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2339 					enum ib_qp_state qp_state)
2340 {
2341 	switch (qp_state) {
2342 	case IB_QPS_RESET:
2343 		return QED_ROCE_QP_STATE_RESET;
2344 	case IB_QPS_INIT:
2345 		return QED_ROCE_QP_STATE_INIT;
2346 	case IB_QPS_RTR:
2347 		return QED_ROCE_QP_STATE_RTR;
2348 	case IB_QPS_RTS:
2349 		return QED_ROCE_QP_STATE_RTS;
2350 	case IB_QPS_SQD:
2351 		return QED_ROCE_QP_STATE_SQD;
2352 	case IB_QPS_ERR:
2353 		return QED_ROCE_QP_STATE_ERR;
2354 	default:
2355 		return QED_ROCE_QP_STATE_ERR;
2356 	}
2357 }
2358 
2359 static int qedr_update_qp_state(struct qedr_dev *dev,
2360 				struct qedr_qp *qp,
2361 				enum qed_roce_qp_state cur_state,
2362 				enum qed_roce_qp_state new_state)
2363 {
2364 	int status = 0;
2365 
2366 	if (new_state == cur_state)
2367 		return 0;
2368 
2369 	switch (cur_state) {
2370 	case QED_ROCE_QP_STATE_RESET:
2371 		switch (new_state) {
2372 		case QED_ROCE_QP_STATE_INIT:
2373 			break;
2374 		default:
2375 			status = -EINVAL;
2376 			break;
2377 		}
2378 		break;
2379 	case QED_ROCE_QP_STATE_INIT:
2380 		switch (new_state) {
2381 		case QED_ROCE_QP_STATE_RTR:
2382 			/* Update doorbell (in case post_recv was
2383 			 * done before move to RTR)
2384 			 */
2385 
2386 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2387 				writel(qp->rq.db_data.raw, qp->rq.db);
2388 			}
2389 			break;
2390 		case QED_ROCE_QP_STATE_ERR:
2391 			break;
2392 		default:
2393 			/* Invalid state change. */
2394 			status = -EINVAL;
2395 			break;
2396 		}
2397 		break;
2398 	case QED_ROCE_QP_STATE_RTR:
2399 		/* RTR->XXX */
2400 		switch (new_state) {
2401 		case QED_ROCE_QP_STATE_RTS:
2402 			break;
2403 		case QED_ROCE_QP_STATE_ERR:
2404 			break;
2405 		default:
2406 			/* Invalid state change. */
2407 			status = -EINVAL;
2408 			break;
2409 		}
2410 		break;
2411 	case QED_ROCE_QP_STATE_RTS:
2412 		/* RTS->XXX */
2413 		switch (new_state) {
2414 		case QED_ROCE_QP_STATE_SQD:
2415 			break;
2416 		case QED_ROCE_QP_STATE_ERR:
2417 			break;
2418 		default:
2419 			/* Invalid state change. */
2420 			status = -EINVAL;
2421 			break;
2422 		}
2423 		break;
2424 	case QED_ROCE_QP_STATE_SQD:
2425 		/* SQD->XXX */
2426 		switch (new_state) {
2427 		case QED_ROCE_QP_STATE_RTS:
2428 		case QED_ROCE_QP_STATE_ERR:
2429 			break;
2430 		default:
2431 			/* Invalid state change. */
2432 			status = -EINVAL;
2433 			break;
2434 		}
2435 		break;
2436 	case QED_ROCE_QP_STATE_ERR:
2437 		/* ERR->XXX */
2438 		switch (new_state) {
2439 		case QED_ROCE_QP_STATE_RESET:
2440 			if ((qp->rq.prod != qp->rq.cons) ||
2441 			    (qp->sq.prod != qp->sq.cons)) {
2442 				DP_NOTICE(dev,
2443 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2444 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2445 					  qp->sq.cons);
2446 				status = -EINVAL;
2447 			}
2448 			break;
2449 		default:
2450 			status = -EINVAL;
2451 			break;
2452 		}
2453 		break;
2454 	default:
2455 		status = -EINVAL;
2456 		break;
2457 	}
2458 
2459 	return status;
2460 }
2461 
2462 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2463 		   int attr_mask, struct ib_udata *udata)
2464 {
2465 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2466 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2467 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2468 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2469 	enum ib_qp_state old_qp_state, new_qp_state;
2470 	enum qed_roce_qp_state cur_state;
2471 	int rc = 0;
2472 
2473 	DP_DEBUG(dev, QEDR_MSG_QP,
2474 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2475 		 attr->qp_state);
2476 
2477 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
2478 		return -EOPNOTSUPP;
2479 
2480 	old_qp_state = qedr_get_ibqp_state(qp->state);
2481 	if (attr_mask & IB_QP_STATE)
2482 		new_qp_state = attr->qp_state;
2483 	else
2484 		new_qp_state = old_qp_state;
2485 
2486 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2487 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2488 					ibqp->qp_type, attr_mask)) {
2489 			DP_ERR(dev,
2490 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2491 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2492 			       attr_mask, qp->qp_id, ibqp->qp_type,
2493 			       old_qp_state, new_qp_state);
2494 			rc = -EINVAL;
2495 			goto err;
2496 		}
2497 	}
2498 
2499 	/* Translate the masks... */
2500 	if (attr_mask & IB_QP_STATE) {
2501 		SET_FIELD(qp_params.modify_flags,
2502 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2503 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2504 	}
2505 
2506 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2507 		qp_params.sqd_async = true;
2508 
2509 	if (attr_mask & IB_QP_PKEY_INDEX) {
2510 		SET_FIELD(qp_params.modify_flags,
2511 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2512 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2513 			rc = -EINVAL;
2514 			goto err;
2515 		}
2516 
2517 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2518 	}
2519 
2520 	if (attr_mask & IB_QP_QKEY)
2521 		qp->qkey = attr->qkey;
2522 
2523 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2524 		SET_FIELD(qp_params.modify_flags,
2525 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2526 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2527 						  IB_ACCESS_REMOTE_READ;
2528 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2529 						   IB_ACCESS_REMOTE_WRITE;
2530 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2531 					       IB_ACCESS_REMOTE_ATOMIC;
2532 	}
2533 
2534 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2535 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2536 			return -EINVAL;
2537 
2538 		if (attr_mask & IB_QP_PATH_MTU) {
2539 			if (attr->path_mtu < IB_MTU_256 ||
2540 			    attr->path_mtu > IB_MTU_4096) {
2541 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2542 				rc = -EINVAL;
2543 				goto err;
2544 			}
2545 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2546 				      ib_mtu_enum_to_int(iboe_get_mtu
2547 							 (dev->ndev->mtu)));
2548 		}
2549 
2550 		if (!qp->mtu) {
2551 			qp->mtu =
2552 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2553 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2554 		}
2555 
2556 		SET_FIELD(qp_params.modify_flags,
2557 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2558 
2559 		qp_params.traffic_class_tos = grh->traffic_class;
2560 		qp_params.flow_label = grh->flow_label;
2561 		qp_params.hop_limit_ttl = grh->hop_limit;
2562 
2563 		qp->sgid_idx = grh->sgid_index;
2564 
2565 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2566 		if (rc) {
2567 			DP_ERR(dev,
2568 			       "modify qp: problems with GID index %d (rc=%d)\n",
2569 			       grh->sgid_index, rc);
2570 			return rc;
2571 		}
2572 
2573 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2574 				   qp_params.remote_mac_addr);
2575 		if (rc)
2576 			return rc;
2577 
2578 		qp_params.use_local_mac = true;
2579 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2580 
2581 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2582 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2583 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2584 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2585 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2586 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2587 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2588 			 qp_params.remote_mac_addr);
2589 
2590 		qp_params.mtu = qp->mtu;
2591 		qp_params.lb_indication = false;
2592 	}
2593 
2594 	if (!qp_params.mtu) {
2595 		/* Stay with current MTU */
2596 		if (qp->mtu)
2597 			qp_params.mtu = qp->mtu;
2598 		else
2599 			qp_params.mtu =
2600 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2601 	}
2602 
2603 	if (attr_mask & IB_QP_TIMEOUT) {
2604 		SET_FIELD(qp_params.modify_flags,
2605 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2606 
2607 		/* The received timeout value is an exponent used like this:
2608 		 *    "12.7.34 LOCAL ACK TIMEOUT
2609 		 *    Value representing the transport (ACK) timeout for use by
2610 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2611 		 * The FW expects timeout in msec so we need to divide the usec
2612 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2613 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2614 		 * The value of zero means infinite so we use a 'max_t' to make
2615 		 * sure that sub 1 msec values will be configured as 1 msec.
2616 		 */
2617 		if (attr->timeout)
2618 			qp_params.ack_timeout =
2619 					1 << max_t(int, attr->timeout - 8, 0);
2620 		else
2621 			qp_params.ack_timeout = 0;
2622 	}
2623 
2624 	if (attr_mask & IB_QP_RETRY_CNT) {
2625 		SET_FIELD(qp_params.modify_flags,
2626 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2627 		qp_params.retry_cnt = attr->retry_cnt;
2628 	}
2629 
2630 	if (attr_mask & IB_QP_RNR_RETRY) {
2631 		SET_FIELD(qp_params.modify_flags,
2632 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2633 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2634 	}
2635 
2636 	if (attr_mask & IB_QP_RQ_PSN) {
2637 		SET_FIELD(qp_params.modify_flags,
2638 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2639 		qp_params.rq_psn = attr->rq_psn;
2640 		qp->rq_psn = attr->rq_psn;
2641 	}
2642 
2643 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2644 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2645 			rc = -EINVAL;
2646 			DP_ERR(dev,
2647 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2648 			       attr->max_rd_atomic,
2649 			       dev->attr.max_qp_req_rd_atomic_resc);
2650 			goto err;
2651 		}
2652 
2653 		SET_FIELD(qp_params.modify_flags,
2654 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2655 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2656 	}
2657 
2658 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2659 		SET_FIELD(qp_params.modify_flags,
2660 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2661 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2662 	}
2663 
2664 	if (attr_mask & IB_QP_SQ_PSN) {
2665 		SET_FIELD(qp_params.modify_flags,
2666 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2667 		qp_params.sq_psn = attr->sq_psn;
2668 		qp->sq_psn = attr->sq_psn;
2669 	}
2670 
2671 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2672 		if (attr->max_dest_rd_atomic >
2673 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2674 			DP_ERR(dev,
2675 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2676 			       attr->max_dest_rd_atomic,
2677 			       dev->attr.max_qp_resp_rd_atomic_resc);
2678 
2679 			rc = -EINVAL;
2680 			goto err;
2681 		}
2682 
2683 		SET_FIELD(qp_params.modify_flags,
2684 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2685 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2686 	}
2687 
2688 	if (attr_mask & IB_QP_DEST_QPN) {
2689 		SET_FIELD(qp_params.modify_flags,
2690 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2691 
2692 		qp_params.dest_qp = attr->dest_qp_num;
2693 		qp->dest_qp_num = attr->dest_qp_num;
2694 	}
2695 
2696 	cur_state = qp->state;
2697 
2698 	/* Update the QP state before the actual ramrod to prevent a race with
2699 	 * fast path. Modifying the QP state to error will cause the device to
2700 	 * flush the CQEs and while polling the flushed CQEs will considered as
2701 	 * a potential issue if the QP isn't in error state.
2702 	 */
2703 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2704 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2705 		qp->state = QED_ROCE_QP_STATE_ERR;
2706 
2707 	if (qp->qp_type != IB_QPT_GSI)
2708 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2709 					      qp->qed_qp, &qp_params);
2710 
2711 	if (attr_mask & IB_QP_STATE) {
2712 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2713 			rc = qedr_update_qp_state(dev, qp, cur_state,
2714 						  qp_params.new_state);
2715 		qp->state = qp_params.new_state;
2716 	}
2717 
2718 err:
2719 	return rc;
2720 }
2721 
2722 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2723 {
2724 	int ib_qp_acc_flags = 0;
2725 
2726 	if (params->incoming_rdma_write_en)
2727 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2728 	if (params->incoming_rdma_read_en)
2729 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2730 	if (params->incoming_atomic_en)
2731 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2732 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2733 	return ib_qp_acc_flags;
2734 }
2735 
2736 int qedr_query_qp(struct ib_qp *ibqp,
2737 		  struct ib_qp_attr *qp_attr,
2738 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2739 {
2740 	struct qed_rdma_query_qp_out_params params;
2741 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2742 	struct qedr_dev *dev = qp->dev;
2743 	int rc = 0;
2744 
2745 	memset(&params, 0, sizeof(params));
2746 
2747 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2748 	if (rc)
2749 		goto err;
2750 
2751 	memset(qp_attr, 0, sizeof(*qp_attr));
2752 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2753 
2754 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2755 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2756 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2757 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2758 	qp_attr->rq_psn = params.rq_psn;
2759 	qp_attr->sq_psn = params.sq_psn;
2760 	qp_attr->dest_qp_num = params.dest_qp;
2761 
2762 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2763 
2764 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2765 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2766 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2767 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2768 	qp_attr->cap.max_inline_data = dev->attr.max_inline;
2769 	qp_init_attr->cap = qp_attr->cap;
2770 
2771 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2772 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2773 			params.flow_label, qp->sgid_idx,
2774 			params.hop_limit_ttl, params.traffic_class_tos);
2775 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2776 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2777 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2778 	qp_attr->timeout = params.timeout;
2779 	qp_attr->rnr_retry = params.rnr_retry;
2780 	qp_attr->retry_cnt = params.retry_cnt;
2781 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2782 	qp_attr->pkey_index = params.pkey_index;
2783 	qp_attr->port_num = 1;
2784 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2785 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2786 	qp_attr->alt_pkey_index = 0;
2787 	qp_attr->alt_port_num = 0;
2788 	qp_attr->alt_timeout = 0;
2789 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2790 
2791 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2792 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2793 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2794 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2795 
2796 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2797 		 qp_attr->cap.max_inline_data);
2798 
2799 err:
2800 	return rc;
2801 }
2802 
2803 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2804 {
2805 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2806 	struct qedr_dev *dev = qp->dev;
2807 	struct ib_qp_attr attr;
2808 	int attr_mask = 0;
2809 
2810 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2811 		 qp, qp->qp_type);
2812 
2813 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2814 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2815 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2816 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2817 
2818 			attr.qp_state = IB_QPS_ERR;
2819 			attr_mask |= IB_QP_STATE;
2820 
2821 			/* Change the QP state to ERROR */
2822 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2823 		}
2824 	} else {
2825 		/* If connection establishment started the WAIT_FOR_CONNECT
2826 		 * bit will be on and we need to Wait for the establishment
2827 		 * to complete before destroying the qp.
2828 		 */
2829 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2830 				     &qp->iwarp_cm_flags))
2831 			wait_for_completion(&qp->iwarp_cm_comp);
2832 
2833 		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2834 		 * bit will be on, and we need to wait for the disconnect to
2835 		 * complete before continuing. We can use the same completion,
2836 		 * iwarp_cm_comp, since this is the only place that waits for
2837 		 * this completion and it is sequential. In addition,
2838 		 * disconnect can't occur before the connection is fully
2839 		 * established, therefore if WAIT_FOR_DISCONNECT is on it
2840 		 * means WAIT_FOR_CONNECT is also on and the completion for
2841 		 * CONNECT already occurred.
2842 		 */
2843 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2844 				     &qp->iwarp_cm_flags))
2845 			wait_for_completion(&qp->iwarp_cm_comp);
2846 	}
2847 
2848 	if (qp->qp_type == IB_QPT_GSI)
2849 		qedr_destroy_gsi_qp(dev);
2850 
2851 	/* We need to remove the entry from the xarray before we release the
2852 	 * qp_id to avoid a race of the qp_id being reallocated and failing
2853 	 * on xa_insert
2854 	 */
2855 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2856 		xa_erase(&dev->qps, qp->qp_id);
2857 
2858 	qedr_free_qp_resources(dev, qp, udata);
2859 
2860 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2861 		qedr_iw_qp_rem_ref(&qp->ibqp);
2862 
2863 	return 0;
2864 }
2865 
2866 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2867 		   struct ib_udata *udata)
2868 {
2869 	struct qedr_ah *ah = get_qedr_ah(ibah);
2870 
2871 	rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
2872 
2873 	return 0;
2874 }
2875 
2876 int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2877 {
2878 	struct qedr_ah *ah = get_qedr_ah(ibah);
2879 
2880 	rdma_destroy_ah_attr(&ah->attr);
2881 	return 0;
2882 }
2883 
2884 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2885 {
2886 	struct qedr_pbl *pbl, *tmp;
2887 
2888 	if (info->pbl_table)
2889 		list_add_tail(&info->pbl_table->list_entry,
2890 			      &info->free_pbl_list);
2891 
2892 	if (!list_empty(&info->inuse_pbl_list))
2893 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2894 
2895 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2896 		list_del(&pbl->list_entry);
2897 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2898 	}
2899 }
2900 
2901 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2902 			size_t page_list_len, bool two_layered)
2903 {
2904 	struct qedr_pbl *tmp;
2905 	int rc;
2906 
2907 	INIT_LIST_HEAD(&info->free_pbl_list);
2908 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2909 
2910 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2911 				  page_list_len, two_layered);
2912 	if (rc)
2913 		goto done;
2914 
2915 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2916 	if (IS_ERR(info->pbl_table)) {
2917 		rc = PTR_ERR(info->pbl_table);
2918 		goto done;
2919 	}
2920 
2921 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2922 		 &info->pbl_table->pa);
2923 
2924 	/* in usual case we use 2 PBLs, so we add one to free
2925 	 * list and allocating another one
2926 	 */
2927 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2928 	if (IS_ERR(tmp)) {
2929 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2930 		goto done;
2931 	}
2932 
2933 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2934 
2935 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2936 
2937 done:
2938 	if (rc)
2939 		free_mr_info(dev, info);
2940 
2941 	return rc;
2942 }
2943 
2944 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2945 			       u64 usr_addr, int acc, struct ib_udata *udata)
2946 {
2947 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2948 	struct qedr_mr *mr;
2949 	struct qedr_pd *pd;
2950 	int rc = -ENOMEM;
2951 
2952 	pd = get_qedr_pd(ibpd);
2953 	DP_DEBUG(dev, QEDR_MSG_MR,
2954 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2955 		 pd->pd_id, start, len, usr_addr, acc);
2956 
2957 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2958 		return ERR_PTR(-EINVAL);
2959 
2960 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2961 	if (!mr)
2962 		return ERR_PTR(rc);
2963 
2964 	mr->type = QEDR_MR_USER;
2965 
2966 	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2967 	if (IS_ERR(mr->umem)) {
2968 		rc = -EFAULT;
2969 		goto err0;
2970 	}
2971 
2972 	rc = init_mr_info(dev, &mr->info,
2973 			  ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1);
2974 	if (rc)
2975 		goto err1;
2976 
2977 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2978 			   &mr->info.pbl_info, PAGE_SHIFT);
2979 
2980 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2981 	if (rc) {
2982 		if (rc == -EINVAL)
2983 			DP_ERR(dev, "Out of MR resources\n");
2984 		else
2985 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
2986 
2987 		goto err1;
2988 	}
2989 
2990 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2991 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2992 	mr->hw_mr.key = 0;
2993 	mr->hw_mr.pd = pd->pd_id;
2994 	mr->hw_mr.local_read = 1;
2995 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2996 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2997 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2998 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2999 	mr->hw_mr.mw_bind = false;
3000 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
3001 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
3002 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
3003 	mr->hw_mr.page_size_log = PAGE_SHIFT;
3004 	mr->hw_mr.length = len;
3005 	mr->hw_mr.vaddr = usr_addr;
3006 	mr->hw_mr.phy_mr = false;
3007 	mr->hw_mr.dma_mr = false;
3008 
3009 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3010 	if (rc) {
3011 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3012 		goto err2;
3013 	}
3014 
3015 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3016 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3017 	    mr->hw_mr.remote_atomic)
3018 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3019 
3020 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
3021 		 mr->ibmr.lkey);
3022 	return &mr->ibmr;
3023 
3024 err2:
3025 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3026 err1:
3027 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
3028 err0:
3029 	kfree(mr);
3030 	return ERR_PTR(rc);
3031 }
3032 
3033 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
3034 {
3035 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
3036 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
3037 	int rc = 0;
3038 
3039 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
3040 	if (rc)
3041 		return rc;
3042 
3043 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3044 
3045 	if (mr->type != QEDR_MR_DMA)
3046 		free_mr_info(dev, &mr->info);
3047 
3048 	/* it could be user registered memory. */
3049 	ib_umem_release(mr->umem);
3050 
3051 	kfree(mr);
3052 
3053 	return rc;
3054 }
3055 
3056 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
3057 				       int max_page_list_len)
3058 {
3059 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3060 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3061 	struct qedr_mr *mr;
3062 	int rc = -ENOMEM;
3063 
3064 	DP_DEBUG(dev, QEDR_MSG_MR,
3065 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
3066 		 max_page_list_len);
3067 
3068 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3069 	if (!mr)
3070 		return ERR_PTR(rc);
3071 
3072 	mr->dev = dev;
3073 	mr->type = QEDR_MR_FRMR;
3074 
3075 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
3076 	if (rc)
3077 		goto err0;
3078 
3079 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3080 	if (rc) {
3081 		if (rc == -EINVAL)
3082 			DP_ERR(dev, "Out of MR resources\n");
3083 		else
3084 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
3085 
3086 		goto err0;
3087 	}
3088 
3089 	/* Index only, 18 bit long, lkey = itid << 8 | key */
3090 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
3091 	mr->hw_mr.key = 0;
3092 	mr->hw_mr.pd = pd->pd_id;
3093 	mr->hw_mr.local_read = 1;
3094 	mr->hw_mr.local_write = 0;
3095 	mr->hw_mr.remote_read = 0;
3096 	mr->hw_mr.remote_write = 0;
3097 	mr->hw_mr.remote_atomic = 0;
3098 	mr->hw_mr.mw_bind = false;
3099 	mr->hw_mr.pbl_ptr = 0;
3100 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
3101 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
3102 	mr->hw_mr.length = 0;
3103 	mr->hw_mr.vaddr = 0;
3104 	mr->hw_mr.phy_mr = true;
3105 	mr->hw_mr.dma_mr = false;
3106 
3107 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3108 	if (rc) {
3109 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3110 		goto err1;
3111 	}
3112 
3113 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3114 	mr->ibmr.rkey = mr->ibmr.lkey;
3115 
3116 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
3117 	return mr;
3118 
3119 err1:
3120 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3121 err0:
3122 	kfree(mr);
3123 	return ERR_PTR(rc);
3124 }
3125 
3126 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3127 			    u32 max_num_sg)
3128 {
3129 	struct qedr_mr *mr;
3130 
3131 	if (mr_type != IB_MR_TYPE_MEM_REG)
3132 		return ERR_PTR(-EINVAL);
3133 
3134 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
3135 
3136 	if (IS_ERR(mr))
3137 		return ERR_PTR(-EINVAL);
3138 
3139 	return &mr->ibmr;
3140 }
3141 
3142 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3143 {
3144 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3145 	struct qedr_pbl *pbl_table;
3146 	struct regpair *pbe;
3147 	u32 pbes_in_page;
3148 
3149 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3150 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3151 		return -ENOMEM;
3152 	}
3153 
3154 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3155 		 mr->npages, addr);
3156 
3157 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3158 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3159 	pbe = (struct regpair *)pbl_table->va;
3160 	pbe +=  mr->npages % pbes_in_page;
3161 	pbe->lo = cpu_to_le32((u32)addr);
3162 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3163 
3164 	mr->npages++;
3165 
3166 	return 0;
3167 }
3168 
3169 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3170 {
3171 	int work = info->completed - info->completed_handled - 1;
3172 
3173 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3174 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3175 		struct qedr_pbl *pbl;
3176 
3177 		/* Free all the page list that are possible to be freed
3178 		 * (all the ones that were invalidated), under the assumption
3179 		 * that if an FMR was completed successfully that means that
3180 		 * if there was an invalidate operation before it also ended
3181 		 */
3182 		pbl = list_first_entry(&info->inuse_pbl_list,
3183 				       struct qedr_pbl, list_entry);
3184 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3185 		info->completed_handled++;
3186 	}
3187 }
3188 
3189 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3190 		   int sg_nents, unsigned int *sg_offset)
3191 {
3192 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3193 
3194 	mr->npages = 0;
3195 
3196 	handle_completed_mrs(mr->dev, &mr->info);
3197 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3198 }
3199 
3200 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3201 {
3202 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3203 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3204 	struct qedr_mr *mr;
3205 	int rc;
3206 
3207 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3208 	if (!mr)
3209 		return ERR_PTR(-ENOMEM);
3210 
3211 	mr->type = QEDR_MR_DMA;
3212 
3213 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3214 	if (rc) {
3215 		if (rc == -EINVAL)
3216 			DP_ERR(dev, "Out of MR resources\n");
3217 		else
3218 			DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
3219 
3220 		goto err1;
3221 	}
3222 
3223 	/* index only, 18 bit long, lkey = itid << 8 | key */
3224 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3225 	mr->hw_mr.pd = pd->pd_id;
3226 	mr->hw_mr.local_read = 1;
3227 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3228 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3229 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3230 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3231 	mr->hw_mr.dma_mr = true;
3232 
3233 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3234 	if (rc) {
3235 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3236 		goto err2;
3237 	}
3238 
3239 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3240 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3241 	    mr->hw_mr.remote_atomic)
3242 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3243 
3244 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3245 	return &mr->ibmr;
3246 
3247 err2:
3248 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3249 err1:
3250 	kfree(mr);
3251 	return ERR_PTR(rc);
3252 }
3253 
3254 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3255 {
3256 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3257 }
3258 
3259 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3260 {
3261 	int i, len = 0;
3262 
3263 	for (i = 0; i < num_sge; i++)
3264 		len += sg_list[i].length;
3265 
3266 	return len;
3267 }
3268 
3269 static void swap_wqe_data64(u64 *p)
3270 {
3271 	int i;
3272 
3273 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3274 		*p = cpu_to_be64(cpu_to_le64(*p));
3275 }
3276 
3277 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3278 				       struct qedr_qp *qp, u8 *wqe_size,
3279 				       const struct ib_send_wr *wr,
3280 				       const struct ib_send_wr **bad_wr,
3281 				       u8 *bits, u8 bit)
3282 {
3283 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3284 	char *seg_prt, *wqe;
3285 	int i, seg_siz;
3286 
3287 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3288 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3289 		*bad_wr = wr;
3290 		return 0;
3291 	}
3292 
3293 	if (!data_size)
3294 		return data_size;
3295 
3296 	*bits |= bit;
3297 
3298 	seg_prt = NULL;
3299 	wqe = NULL;
3300 	seg_siz = 0;
3301 
3302 	/* Copy data inline */
3303 	for (i = 0; i < wr->num_sge; i++) {
3304 		u32 len = wr->sg_list[i].length;
3305 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3306 
3307 		while (len > 0) {
3308 			u32 cur;
3309 
3310 			/* New segment required */
3311 			if (!seg_siz) {
3312 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3313 				seg_prt = wqe;
3314 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3315 				(*wqe_size)++;
3316 			}
3317 
3318 			/* Calculate currently allowed length */
3319 			cur = min_t(u32, len, seg_siz);
3320 			memcpy(seg_prt, src, cur);
3321 
3322 			/* Update segment variables */
3323 			seg_prt += cur;
3324 			seg_siz -= cur;
3325 
3326 			/* Update sge variables */
3327 			src += cur;
3328 			len -= cur;
3329 
3330 			/* Swap fully-completed segments */
3331 			if (!seg_siz)
3332 				swap_wqe_data64((u64 *)wqe);
3333 		}
3334 	}
3335 
3336 	/* swap last not completed segment */
3337 	if (seg_siz)
3338 		swap_wqe_data64((u64 *)wqe);
3339 
3340 	return data_size;
3341 }
3342 
3343 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3344 	do {							\
3345 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3346 		(sge)->length = cpu_to_le32(vlength);		\
3347 		(sge)->flags = cpu_to_le32(vflags);		\
3348 	} while (0)
3349 
3350 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3351 	do {							\
3352 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3353 		(hdr)->num_sges = num_sge;			\
3354 	} while (0)
3355 
3356 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3357 	do {							\
3358 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3359 		(sge)->length = cpu_to_le32(vlength);		\
3360 		(sge)->l_key = cpu_to_le32(vlkey);		\
3361 	} while (0)
3362 
3363 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3364 				const struct ib_send_wr *wr)
3365 {
3366 	u32 data_size = 0;
3367 	int i;
3368 
3369 	for (i = 0; i < wr->num_sge; i++) {
3370 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3371 
3372 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3373 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3374 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3375 		data_size += wr->sg_list[i].length;
3376 	}
3377 
3378 	if (wqe_size)
3379 		*wqe_size += wr->num_sge;
3380 
3381 	return data_size;
3382 }
3383 
3384 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3385 				     struct qedr_qp *qp,
3386 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3387 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3388 				     const struct ib_send_wr *wr,
3389 				     const struct ib_send_wr **bad_wr)
3390 {
3391 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3392 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3393 
3394 	if (wr->send_flags & IB_SEND_INLINE &&
3395 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3396 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3397 		u8 flags = 0;
3398 
3399 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3400 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3401 						   bad_wr, &rwqe->flags, flags);
3402 	}
3403 
3404 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3405 }
3406 
3407 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3408 				     struct qedr_qp *qp,
3409 				     struct rdma_sq_send_wqe_1st *swqe,
3410 				     struct rdma_sq_send_wqe_2st *swqe2,
3411 				     const struct ib_send_wr *wr,
3412 				     const struct ib_send_wr **bad_wr)
3413 {
3414 	memset(swqe2, 0, sizeof(*swqe2));
3415 	if (wr->send_flags & IB_SEND_INLINE) {
3416 		u8 flags = 0;
3417 
3418 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3419 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3420 						   bad_wr, &swqe->flags, flags);
3421 	}
3422 
3423 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3424 }
3425 
3426 static int qedr_prepare_reg(struct qedr_qp *qp,
3427 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3428 			    const struct ib_reg_wr *wr)
3429 {
3430 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3431 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3432 
3433 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3434 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3435 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3436 	fwqe1->l_key = wr->key;
3437 
3438 	fwqe2->access_ctrl = 0;
3439 
3440 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3441 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3442 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3443 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3444 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3445 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3446 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3447 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3448 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3449 	fwqe2->fmr_ctrl = 0;
3450 
3451 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3452 		   ilog2(mr->ibmr.page_size) - 12);
3453 
3454 	fwqe2->length_hi = 0;
3455 	fwqe2->length_lo = mr->ibmr.length;
3456 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3457 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3458 
3459 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3460 
3461 	return 0;
3462 }
3463 
3464 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3465 {
3466 	switch (opcode) {
3467 	case IB_WR_RDMA_WRITE:
3468 	case IB_WR_RDMA_WRITE_WITH_IMM:
3469 		return IB_WC_RDMA_WRITE;
3470 	case IB_WR_SEND_WITH_IMM:
3471 	case IB_WR_SEND:
3472 	case IB_WR_SEND_WITH_INV:
3473 		return IB_WC_SEND;
3474 	case IB_WR_RDMA_READ:
3475 	case IB_WR_RDMA_READ_WITH_INV:
3476 		return IB_WC_RDMA_READ;
3477 	case IB_WR_ATOMIC_CMP_AND_SWP:
3478 		return IB_WC_COMP_SWAP;
3479 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3480 		return IB_WC_FETCH_ADD;
3481 	case IB_WR_REG_MR:
3482 		return IB_WC_REG_MR;
3483 	case IB_WR_LOCAL_INV:
3484 		return IB_WC_LOCAL_INV;
3485 	default:
3486 		return IB_WC_SEND;
3487 	}
3488 }
3489 
3490 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3491 				      const struct ib_send_wr *wr)
3492 {
3493 	int wq_is_full, err_wr, pbl_is_full;
3494 	struct qedr_dev *dev = qp->dev;
3495 
3496 	/* prevent SQ overflow and/or processing of a bad WR */
3497 	err_wr = wr->num_sge > qp->sq.max_sges;
3498 	wq_is_full = qedr_wq_is_full(&qp->sq);
3499 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3500 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3501 	if (wq_is_full || err_wr || pbl_is_full) {
3502 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3503 			DP_ERR(dev,
3504 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3505 			       qp);
3506 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3507 		}
3508 
3509 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3510 			DP_ERR(dev,
3511 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3512 			       qp);
3513 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3514 		}
3515 
3516 		if (pbl_is_full &&
3517 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3518 			DP_ERR(dev,
3519 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3520 			       qp);
3521 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3522 		}
3523 		return false;
3524 	}
3525 	return true;
3526 }
3527 
3528 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3529 			    const struct ib_send_wr **bad_wr)
3530 {
3531 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3532 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3533 	struct rdma_sq_atomic_wqe_1st *awqe1;
3534 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3535 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3536 	struct rdma_sq_send_wqe_2st *swqe2;
3537 	struct rdma_sq_local_inv_wqe *iwqe;
3538 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3539 	struct rdma_sq_send_wqe_1st *swqe;
3540 	struct rdma_sq_rdma_wqe_1st *rwqe;
3541 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3542 	struct rdma_sq_common_wqe *wqe;
3543 	u32 length;
3544 	int rc = 0;
3545 	bool comp;
3546 
3547 	if (!qedr_can_post_send(qp, wr)) {
3548 		*bad_wr = wr;
3549 		return -ENOMEM;
3550 	}
3551 
3552 	wqe = qed_chain_produce(&qp->sq.pbl);
3553 	qp->wqe_wr_id[qp->sq.prod].signaled =
3554 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3555 
3556 	wqe->flags = 0;
3557 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3558 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3559 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3560 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3561 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3562 		   !!(wr->send_flags & IB_SEND_FENCE));
3563 	wqe->prev_wqe_size = qp->prev_wqe_size;
3564 
3565 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3566 
3567 	switch (wr->opcode) {
3568 	case IB_WR_SEND_WITH_IMM:
3569 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3570 			rc = -EINVAL;
3571 			*bad_wr = wr;
3572 			break;
3573 		}
3574 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3575 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3576 		swqe->wqe_size = 2;
3577 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3578 
3579 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3580 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3581 						   wr, bad_wr);
3582 		swqe->length = cpu_to_le32(length);
3583 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3584 		qp->prev_wqe_size = swqe->wqe_size;
3585 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3586 		break;
3587 	case IB_WR_SEND:
3588 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3589 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3590 
3591 		swqe->wqe_size = 2;
3592 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3593 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3594 						   wr, bad_wr);
3595 		swqe->length = cpu_to_le32(length);
3596 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3597 		qp->prev_wqe_size = swqe->wqe_size;
3598 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3599 		break;
3600 	case IB_WR_SEND_WITH_INV:
3601 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3602 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3603 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3604 		swqe->wqe_size = 2;
3605 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3606 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3607 						   wr, bad_wr);
3608 		swqe->length = cpu_to_le32(length);
3609 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3610 		qp->prev_wqe_size = swqe->wqe_size;
3611 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3612 		break;
3613 
3614 	case IB_WR_RDMA_WRITE_WITH_IMM:
3615 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3616 			rc = -EINVAL;
3617 			*bad_wr = wr;
3618 			break;
3619 		}
3620 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3621 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3622 
3623 		rwqe->wqe_size = 2;
3624 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3625 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3626 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3627 						   wr, bad_wr);
3628 		rwqe->length = cpu_to_le32(length);
3629 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3630 		qp->prev_wqe_size = rwqe->wqe_size;
3631 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3632 		break;
3633 	case IB_WR_RDMA_WRITE:
3634 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3635 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3636 
3637 		rwqe->wqe_size = 2;
3638 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3639 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3640 						   wr, bad_wr);
3641 		rwqe->length = cpu_to_le32(length);
3642 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3643 		qp->prev_wqe_size = rwqe->wqe_size;
3644 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3645 		break;
3646 	case IB_WR_RDMA_READ_WITH_INV:
3647 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3648 		fallthrough;	/* same is identical to RDMA READ */
3649 
3650 	case IB_WR_RDMA_READ:
3651 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3652 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3653 
3654 		rwqe->wqe_size = 2;
3655 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3656 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3657 						   wr, bad_wr);
3658 		rwqe->length = cpu_to_le32(length);
3659 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3660 		qp->prev_wqe_size = rwqe->wqe_size;
3661 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3662 		break;
3663 
3664 	case IB_WR_ATOMIC_CMP_AND_SWP:
3665 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3666 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3667 		awqe1->wqe_size = 4;
3668 
3669 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3670 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3671 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3672 
3673 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3674 
3675 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3676 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3677 			DMA_REGPAIR_LE(awqe3->swap_data,
3678 				       atomic_wr(wr)->compare_add);
3679 		} else {
3680 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3681 			DMA_REGPAIR_LE(awqe3->swap_data,
3682 				       atomic_wr(wr)->swap);
3683 			DMA_REGPAIR_LE(awqe3->cmp_data,
3684 				       atomic_wr(wr)->compare_add);
3685 		}
3686 
3687 		qedr_prepare_sq_sges(qp, NULL, wr);
3688 
3689 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3690 		qp->prev_wqe_size = awqe1->wqe_size;
3691 		break;
3692 
3693 	case IB_WR_LOCAL_INV:
3694 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3695 		iwqe->wqe_size = 1;
3696 
3697 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3698 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3699 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3700 		qp->prev_wqe_size = iwqe->wqe_size;
3701 		break;
3702 	case IB_WR_REG_MR:
3703 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3704 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3705 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3706 		fwqe1->wqe_size = 2;
3707 
3708 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3709 		if (rc) {
3710 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3711 			*bad_wr = wr;
3712 			break;
3713 		}
3714 
3715 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3716 		qp->prev_wqe_size = fwqe1->wqe_size;
3717 		break;
3718 	default:
3719 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3720 		rc = -EINVAL;
3721 		*bad_wr = wr;
3722 		break;
3723 	}
3724 
3725 	if (*bad_wr) {
3726 		u16 value;
3727 
3728 		/* Restore prod to its position before
3729 		 * this WR was processed
3730 		 */
3731 		value = le16_to_cpu(qp->sq.db_data.data.value);
3732 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3733 
3734 		/* Restore prev_wqe_size */
3735 		qp->prev_wqe_size = wqe->prev_wqe_size;
3736 		rc = -EINVAL;
3737 		DP_ERR(dev, "POST SEND FAILED\n");
3738 	}
3739 
3740 	return rc;
3741 }
3742 
3743 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3744 		   const struct ib_send_wr **bad_wr)
3745 {
3746 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3747 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3748 	unsigned long flags;
3749 	int rc = 0;
3750 
3751 	*bad_wr = NULL;
3752 
3753 	if (qp->qp_type == IB_QPT_GSI)
3754 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3755 
3756 	spin_lock_irqsave(&qp->q_lock, flags);
3757 
3758 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3759 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3760 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3761 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3762 			spin_unlock_irqrestore(&qp->q_lock, flags);
3763 			*bad_wr = wr;
3764 			DP_DEBUG(dev, QEDR_MSG_CQ,
3765 				 "QP in wrong state! QP icid=0x%x state %d\n",
3766 				 qp->icid, qp->state);
3767 			return -EINVAL;
3768 		}
3769 	}
3770 
3771 	while (wr) {
3772 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3773 		if (rc)
3774 			break;
3775 
3776 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3777 
3778 		qedr_inc_sw_prod(&qp->sq);
3779 
3780 		qp->sq.db_data.data.value++;
3781 
3782 		wr = wr->next;
3783 	}
3784 
3785 	/* Trigger doorbell
3786 	 * If there was a failure in the first WR then it will be triggered in
3787 	 * vane. However this is not harmful (as long as the producer value is
3788 	 * unchanged). For performance reasons we avoid checking for this
3789 	 * redundant doorbell.
3790 	 *
3791 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3792 	 * soon as we give the doorbell, we could get a completion
3793 	 * for this wr, therefore we need to make sure that the
3794 	 * memory is updated before giving the doorbell.
3795 	 * During qedr_poll_cq, rmb is called before accessing the
3796 	 * cqe. This covers for the smp_rmb as well.
3797 	 */
3798 	smp_wmb();
3799 	writel(qp->sq.db_data.raw, qp->sq.db);
3800 
3801 	spin_unlock_irqrestore(&qp->q_lock, flags);
3802 
3803 	return rc;
3804 }
3805 
3806 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3807 {
3808 	u32 used;
3809 
3810 	/* Calculate number of elements used based on producer
3811 	 * count and consumer count and subtract it from max
3812 	 * work request supported so that we get elements left.
3813 	 */
3814 	used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
3815 
3816 	return hw_srq->max_wr - used;
3817 }
3818 
3819 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3820 		       const struct ib_recv_wr **bad_wr)
3821 {
3822 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3823 	struct qedr_srq_hwq_info *hw_srq;
3824 	struct qedr_dev *dev = srq->dev;
3825 	struct qed_chain *pbl;
3826 	unsigned long flags;
3827 	int status = 0;
3828 	u32 num_sge;
3829 
3830 	spin_lock_irqsave(&srq->lock, flags);
3831 
3832 	hw_srq = &srq->hw_srq;
3833 	pbl = &srq->hw_srq.pbl;
3834 	while (wr) {
3835 		struct rdma_srq_wqe_header *hdr;
3836 		int i;
3837 
3838 		if (!qedr_srq_elem_left(hw_srq) ||
3839 		    wr->num_sge > srq->hw_srq.max_sges) {
3840 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3841 			       hw_srq->wr_prod_cnt,
3842 			       atomic_read(&hw_srq->wr_cons_cnt),
3843 			       wr->num_sge, srq->hw_srq.max_sges);
3844 			status = -ENOMEM;
3845 			*bad_wr = wr;
3846 			break;
3847 		}
3848 
3849 		hdr = qed_chain_produce(pbl);
3850 		num_sge = wr->num_sge;
3851 		/* Set number of sge and work request id in header */
3852 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3853 
3854 		srq->hw_srq.wr_prod_cnt++;
3855 		hw_srq->wqe_prod++;
3856 		hw_srq->sge_prod++;
3857 
3858 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3859 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3860 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3861 
3862 		for (i = 0; i < wr->num_sge; i++) {
3863 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3864 
3865 			/* Set SGE length, lkey and address */
3866 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3867 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3868 
3869 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3870 				 "[%d]: len %d key %x addr %x:%x\n",
3871 				 i, srq_sge->length, srq_sge->l_key,
3872 				 srq_sge->addr.hi, srq_sge->addr.lo);
3873 			hw_srq->sge_prod++;
3874 		}
3875 
3876 		/* Update WQE and SGE information before
3877 		 * updating producer.
3878 		 */
3879 		dma_wmb();
3880 
3881 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3882 		 * in first 4 bytes and need to update WQE producer in
3883 		 * next 4 bytes.
3884 		 */
3885 		srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod);
3886 		/* Make sure sge producer is updated first */
3887 		dma_wmb();
3888 		srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod);
3889 
3890 		wr = wr->next;
3891 	}
3892 
3893 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3894 		 qed_chain_get_elem_left(pbl));
3895 	spin_unlock_irqrestore(&srq->lock, flags);
3896 
3897 	return status;
3898 }
3899 
3900 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3901 		   const struct ib_recv_wr **bad_wr)
3902 {
3903 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3904 	struct qedr_dev *dev = qp->dev;
3905 	unsigned long flags;
3906 	int status = 0;
3907 
3908 	if (qp->qp_type == IB_QPT_GSI)
3909 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3910 
3911 	spin_lock_irqsave(&qp->q_lock, flags);
3912 
3913 	while (wr) {
3914 		int i;
3915 
3916 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3917 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3918 		    wr->num_sge > qp->rq.max_sges) {
3919 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3920 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3921 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3922 			       qp->rq.max_sges);
3923 			status = -ENOMEM;
3924 			*bad_wr = wr;
3925 			break;
3926 		}
3927 		for (i = 0; i < wr->num_sge; i++) {
3928 			u32 flags = 0;
3929 			struct rdma_rq_sge *rqe =
3930 			    qed_chain_produce(&qp->rq.pbl);
3931 
3932 			/* First one must include the number
3933 			 * of SGE in the list
3934 			 */
3935 			if (!i)
3936 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3937 					  wr->num_sge);
3938 
3939 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3940 				  wr->sg_list[i].lkey);
3941 
3942 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3943 				   wr->sg_list[i].length, flags);
3944 		}
3945 
3946 		/* Special case of no sges. FW requires between 1-4 sges...
3947 		 * in this case we need to post 1 sge with length zero. this is
3948 		 * because rdma write with immediate consumes an RQ.
3949 		 */
3950 		if (!wr->num_sge) {
3951 			u32 flags = 0;
3952 			struct rdma_rq_sge *rqe =
3953 			    qed_chain_produce(&qp->rq.pbl);
3954 
3955 			/* First one must include the number
3956 			 * of SGE in the list
3957 			 */
3958 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3959 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3960 
3961 			RQ_SGE_SET(rqe, 0, 0, flags);
3962 			i = 1;
3963 		}
3964 
3965 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3966 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3967 
3968 		qedr_inc_sw_prod(&qp->rq);
3969 
3970 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3971 		 * soon as we give the doorbell, we could get a completion
3972 		 * for this wr, therefore we need to make sure that the
3973 		 * memory is update before giving the doorbell.
3974 		 * During qedr_poll_cq, rmb is called before accessing the
3975 		 * cqe. This covers for the smp_rmb as well.
3976 		 */
3977 		smp_wmb();
3978 
3979 		qp->rq.db_data.data.value++;
3980 
3981 		writel(qp->rq.db_data.raw, qp->rq.db);
3982 
3983 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3984 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3985 		}
3986 
3987 		wr = wr->next;
3988 	}
3989 
3990 	spin_unlock_irqrestore(&qp->q_lock, flags);
3991 
3992 	return status;
3993 }
3994 
3995 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3996 {
3997 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3998 
3999 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
4000 		cq->pbl_toggle;
4001 }
4002 
4003 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
4004 {
4005 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
4006 	struct qedr_qp *qp;
4007 
4008 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
4009 						   resp_cqe->qp_handle.lo,
4010 						   u64);
4011 	return qp;
4012 }
4013 
4014 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
4015 {
4016 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
4017 
4018 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
4019 }
4020 
4021 /* Return latest CQE (needs processing) */
4022 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
4023 {
4024 	return cq->latest_cqe;
4025 }
4026 
4027 /* In fmr we need to increase the number of fmr completed counter for the fmr
4028  * algorithm determining whether we can free a pbl or not.
4029  * we need to perform this whether the work request was signaled or not. for
4030  * this purpose we call this function from the condition that checks if a wr
4031  * should be skipped, to make sure we don't miss it ( possibly this fmr
4032  * operation was not signalted)
4033  */
4034 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
4035 {
4036 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
4037 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
4038 }
4039 
4040 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
4041 		       struct qedr_cq *cq, int num_entries,
4042 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
4043 		       int force)
4044 {
4045 	u16 cnt = 0;
4046 
4047 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
4048 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
4049 			qedr_chk_if_fmr(qp);
4050 			/* skip WC */
4051 			goto next_cqe;
4052 		}
4053 
4054 		/* fill WC */
4055 		wc->status = status;
4056 		wc->vendor_err = 0;
4057 		wc->wc_flags = 0;
4058 		wc->src_qp = qp->id;
4059 		wc->qp = &qp->ibqp;
4060 
4061 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
4062 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
4063 
4064 		switch (wc->opcode) {
4065 		case IB_WC_RDMA_WRITE:
4066 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
4067 			break;
4068 		case IB_WC_COMP_SWAP:
4069 		case IB_WC_FETCH_ADD:
4070 			wc->byte_len = 8;
4071 			break;
4072 		case IB_WC_REG_MR:
4073 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
4074 			break;
4075 		case IB_WC_RDMA_READ:
4076 		case IB_WC_SEND:
4077 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
4078 			break;
4079 		default:
4080 			break;
4081 		}
4082 
4083 		num_entries--;
4084 		wc++;
4085 		cnt++;
4086 next_cqe:
4087 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
4088 			qed_chain_consume(&qp->sq.pbl);
4089 		qedr_inc_sw_cons(&qp->sq);
4090 	}
4091 
4092 	return cnt;
4093 }
4094 
4095 static int qedr_poll_cq_req(struct qedr_dev *dev,
4096 			    struct qedr_qp *qp, struct qedr_cq *cq,
4097 			    int num_entries, struct ib_wc *wc,
4098 			    struct rdma_cqe_requester *req)
4099 {
4100 	int cnt = 0;
4101 
4102 	switch (req->status) {
4103 	case RDMA_CQE_REQ_STS_OK:
4104 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4105 				  IB_WC_SUCCESS, 0);
4106 		break;
4107 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
4108 		if (qp->state != QED_ROCE_QP_STATE_ERR)
4109 			DP_DEBUG(dev, QEDR_MSG_CQ,
4110 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4111 				 cq->icid, qp->icid);
4112 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4113 				  IB_WC_WR_FLUSH_ERR, 1);
4114 		break;
4115 	default:
4116 		/* process all WQE before the cosumer */
4117 		qp->state = QED_ROCE_QP_STATE_ERR;
4118 		cnt = process_req(dev, qp, cq, num_entries, wc,
4119 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
4120 		wc += cnt;
4121 		/* if we have extra WC fill it with actual error info */
4122 		if (cnt < num_entries) {
4123 			enum ib_wc_status wc_status;
4124 
4125 			switch (req->status) {
4126 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4127 				DP_ERR(dev,
4128 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4129 				       cq->icid, qp->icid);
4130 				wc_status = IB_WC_BAD_RESP_ERR;
4131 				break;
4132 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4133 				DP_ERR(dev,
4134 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4135 				       cq->icid, qp->icid);
4136 				wc_status = IB_WC_LOC_LEN_ERR;
4137 				break;
4138 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4139 				DP_ERR(dev,
4140 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4141 				       cq->icid, qp->icid);
4142 				wc_status = IB_WC_LOC_QP_OP_ERR;
4143 				break;
4144 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4145 				DP_ERR(dev,
4146 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4147 				       cq->icid, qp->icid);
4148 				wc_status = IB_WC_LOC_PROT_ERR;
4149 				break;
4150 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4151 				DP_ERR(dev,
4152 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4153 				       cq->icid, qp->icid);
4154 				wc_status = IB_WC_MW_BIND_ERR;
4155 				break;
4156 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4157 				DP_ERR(dev,
4158 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4159 				       cq->icid, qp->icid);
4160 				wc_status = IB_WC_REM_INV_REQ_ERR;
4161 				break;
4162 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4163 				DP_ERR(dev,
4164 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4165 				       cq->icid, qp->icid);
4166 				wc_status = IB_WC_REM_ACCESS_ERR;
4167 				break;
4168 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4169 				DP_ERR(dev,
4170 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4171 				       cq->icid, qp->icid);
4172 				wc_status = IB_WC_REM_OP_ERR;
4173 				break;
4174 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4175 				DP_ERR(dev,
4176 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4177 				       cq->icid, qp->icid);
4178 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4179 				break;
4180 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4181 				DP_ERR(dev,
4182 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4183 				       cq->icid, qp->icid);
4184 				wc_status = IB_WC_RETRY_EXC_ERR;
4185 				break;
4186 			default:
4187 				DP_ERR(dev,
4188 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4189 				       cq->icid, qp->icid);
4190 				wc_status = IB_WC_GENERAL_ERR;
4191 			}
4192 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4193 					   wc_status, 1);
4194 		}
4195 	}
4196 
4197 	return cnt;
4198 }
4199 
4200 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4201 {
4202 	switch (status) {
4203 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4204 		return IB_WC_LOC_ACCESS_ERR;
4205 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4206 		return IB_WC_LOC_LEN_ERR;
4207 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4208 		return IB_WC_LOC_QP_OP_ERR;
4209 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4210 		return IB_WC_LOC_PROT_ERR;
4211 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4212 		return IB_WC_MW_BIND_ERR;
4213 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4214 		return IB_WC_REM_INV_RD_REQ_ERR;
4215 	case RDMA_CQE_RESP_STS_OK:
4216 		return IB_WC_SUCCESS;
4217 	default:
4218 		return IB_WC_GENERAL_ERR;
4219 	}
4220 }
4221 
4222 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4223 					  struct ib_wc *wc)
4224 {
4225 	wc->status = IB_WC_SUCCESS;
4226 	wc->byte_len = le32_to_cpu(resp->length);
4227 
4228 	if (resp->flags & QEDR_RESP_IMM) {
4229 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4230 		wc->wc_flags |= IB_WC_WITH_IMM;
4231 
4232 		if (resp->flags & QEDR_RESP_RDMA)
4233 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4234 
4235 		if (resp->flags & QEDR_RESP_INV)
4236 			return -EINVAL;
4237 
4238 	} else if (resp->flags & QEDR_RESP_INV) {
4239 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4240 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4241 
4242 		if (resp->flags & QEDR_RESP_RDMA)
4243 			return -EINVAL;
4244 
4245 	} else if (resp->flags & QEDR_RESP_RDMA) {
4246 		return -EINVAL;
4247 	}
4248 
4249 	return 0;
4250 }
4251 
4252 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4253 			       struct qedr_cq *cq, struct ib_wc *wc,
4254 			       struct rdma_cqe_responder *resp, u64 wr_id)
4255 {
4256 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4257 	wc->opcode = IB_WC_RECV;
4258 	wc->wc_flags = 0;
4259 
4260 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4261 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4262 			DP_ERR(dev,
4263 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4264 			       cq, cq->icid, resp->flags);
4265 
4266 	} else {
4267 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4268 		if (wc->status == IB_WC_GENERAL_ERR)
4269 			DP_ERR(dev,
4270 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4271 			       cq, cq->icid, resp->status);
4272 	}
4273 
4274 	/* Fill the rest of the WC */
4275 	wc->vendor_err = 0;
4276 	wc->src_qp = qp->id;
4277 	wc->qp = &qp->ibqp;
4278 	wc->wr_id = wr_id;
4279 }
4280 
4281 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4282 				struct qedr_cq *cq, struct ib_wc *wc,
4283 				struct rdma_cqe_responder *resp)
4284 {
4285 	struct qedr_srq *srq = qp->srq;
4286 	u64 wr_id;
4287 
4288 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4289 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4290 
4291 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4292 		wc->status = IB_WC_WR_FLUSH_ERR;
4293 		wc->vendor_err = 0;
4294 		wc->wr_id = wr_id;
4295 		wc->byte_len = 0;
4296 		wc->src_qp = qp->id;
4297 		wc->qp = &qp->ibqp;
4298 		wc->wr_id = wr_id;
4299 	} else {
4300 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4301 	}
4302 	atomic_inc(&srq->hw_srq.wr_cons_cnt);
4303 
4304 	return 1;
4305 }
4306 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4307 			    struct qedr_cq *cq, struct ib_wc *wc,
4308 			    struct rdma_cqe_responder *resp)
4309 {
4310 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4311 
4312 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4313 
4314 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4315 		qed_chain_consume(&qp->rq.pbl);
4316 	qedr_inc_sw_cons(&qp->rq);
4317 
4318 	return 1;
4319 }
4320 
4321 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4322 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4323 {
4324 	u16 cnt = 0;
4325 
4326 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4327 		/* fill WC */
4328 		wc->status = IB_WC_WR_FLUSH_ERR;
4329 		wc->vendor_err = 0;
4330 		wc->wc_flags = 0;
4331 		wc->src_qp = qp->id;
4332 		wc->byte_len = 0;
4333 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4334 		wc->qp = &qp->ibqp;
4335 		num_entries--;
4336 		wc++;
4337 		cnt++;
4338 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4339 			qed_chain_consume(&qp->rq.pbl);
4340 		qedr_inc_sw_cons(&qp->rq);
4341 	}
4342 
4343 	return cnt;
4344 }
4345 
4346 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4347 				 struct rdma_cqe_responder *resp, int *update)
4348 {
4349 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4350 		consume_cqe(cq);
4351 		*update |= 1;
4352 	}
4353 }
4354 
4355 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4356 				 struct qedr_cq *cq, int num_entries,
4357 				 struct ib_wc *wc,
4358 				 struct rdma_cqe_responder *resp)
4359 {
4360 	int cnt;
4361 
4362 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4363 	consume_cqe(cq);
4364 
4365 	return cnt;
4366 }
4367 
4368 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4369 			     struct qedr_cq *cq, int num_entries,
4370 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4371 			     int *update)
4372 {
4373 	int cnt;
4374 
4375 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4376 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4377 					 resp->rq_cons_or_srq_id);
4378 		try_consume_resp_cqe(cq, qp, resp, update);
4379 	} else {
4380 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4381 		consume_cqe(cq);
4382 		*update |= 1;
4383 	}
4384 
4385 	return cnt;
4386 }
4387 
4388 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4389 				struct rdma_cqe_requester *req, int *update)
4390 {
4391 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4392 		consume_cqe(cq);
4393 		*update |= 1;
4394 	}
4395 }
4396 
4397 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4398 {
4399 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4400 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4401 	union rdma_cqe *cqe;
4402 	u32 old_cons, new_cons;
4403 	unsigned long flags;
4404 	int update = 0;
4405 	int done = 0;
4406 
4407 	if (cq->destroyed) {
4408 		DP_ERR(dev,
4409 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4410 		       cq, cq->icid);
4411 		return 0;
4412 	}
4413 
4414 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4415 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4416 
4417 	spin_lock_irqsave(&cq->cq_lock, flags);
4418 	cqe = cq->latest_cqe;
4419 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4420 	while (num_entries && is_valid_cqe(cq, cqe)) {
4421 		struct qedr_qp *qp;
4422 		int cnt = 0;
4423 
4424 		/* prevent speculative reads of any field of CQE */
4425 		rmb();
4426 
4427 		qp = cqe_get_qp(cqe);
4428 		if (!qp) {
4429 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4430 			break;
4431 		}
4432 
4433 		wc->qp = &qp->ibqp;
4434 
4435 		switch (cqe_get_type(cqe)) {
4436 		case RDMA_CQE_TYPE_REQUESTER:
4437 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4438 					       &cqe->req);
4439 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4440 			break;
4441 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4442 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4443 						&cqe->resp, &update);
4444 			break;
4445 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4446 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4447 						    wc, &cqe->resp);
4448 			update = 1;
4449 			break;
4450 		case RDMA_CQE_TYPE_INVALID:
4451 		default:
4452 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4453 			       cqe_get_type(cqe));
4454 		}
4455 		num_entries -= cnt;
4456 		wc += cnt;
4457 		done += cnt;
4458 
4459 		cqe = get_cqe(cq);
4460 	}
4461 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4462 
4463 	cq->cq_cons += new_cons - old_cons;
4464 
4465 	if (update)
4466 		/* doorbell notifies abount latest VALID entry,
4467 		 * but chain already point to the next INVALID one
4468 		 */
4469 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4470 
4471 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4472 	return done;
4473 }
4474 
4475 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4476 		     u32 port_num, const struct ib_wc *in_wc,
4477 		     const struct ib_grh *in_grh, const struct ib_mad *in,
4478 		     struct ib_mad *out_mad, size_t *out_mad_size,
4479 		     u16 *out_mad_pkey_index)
4480 {
4481 	return IB_MAD_RESULT_SUCCESS;
4482 }
4483