xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision e533cda12d8f0e7936354bafdc85c81741f805d2)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
55 
56 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
57 #define	RDMA_MAX_SGE_PER_SRQ	(4)
58 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
59 
60 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
61 
62 enum {
63 	QEDR_USER_MMAP_IO_WC = 0,
64 	QEDR_USER_MMAP_PHYS_PAGE,
65 };
66 
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
68 					size_t len)
69 {
70 	size_t min_len = min_t(size_t, len, udata->outlen);
71 
72 	return ib_copy_to_udata(udata, src, min_len);
73 }
74 
75 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
76 {
77 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
78 		return -EINVAL;
79 
80 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
81 	return 0;
82 }
83 
84 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
85 		      int index, union ib_gid *sgid)
86 {
87 	struct qedr_dev *dev = get_qedr_dev(ibdev);
88 
89 	memset(sgid->raw, 0, sizeof(sgid->raw));
90 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
91 
92 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 		 sgid->global.interface_id, sgid->global.subnet_prefix);
94 
95 	return 0;
96 }
97 
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
99 {
100 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 	struct qedr_device_attr *qattr = &dev->attr;
102 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
103 
104 	srq_attr->srq_limit = srq->srq_limit;
105 	srq_attr->max_wr = qattr->max_srq_wr;
106 	srq_attr->max_sge = qattr->max_sge;
107 
108 	return 0;
109 }
110 
111 int qedr_query_device(struct ib_device *ibdev,
112 		      struct ib_device_attr *attr, struct ib_udata *udata)
113 {
114 	struct qedr_dev *dev = get_qedr_dev(ibdev);
115 	struct qedr_device_attr *qattr = &dev->attr;
116 
117 	if (!dev->rdma_ctx) {
118 		DP_ERR(dev,
119 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
120 		       dev->rdma_ctx);
121 		return -EINVAL;
122 	}
123 
124 	memset(attr, 0, sizeof(*attr));
125 
126 	attr->fw_ver = qattr->fw_ver;
127 	attr->sys_image_guid = qattr->sys_image_guid;
128 	attr->max_mr_size = qattr->max_mr_size;
129 	attr->page_size_cap = qattr->page_size_caps;
130 	attr->vendor_id = qattr->vendor_id;
131 	attr->vendor_part_id = qattr->vendor_part_id;
132 	attr->hw_ver = qattr->hw_ver;
133 	attr->max_qp = qattr->max_qp;
134 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 	    IB_DEVICE_RC_RNR_NAK_GEN |
137 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
138 
139 	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
140 		attr->device_cap_flags |= IB_DEVICE_XRC;
141 	attr->max_send_sge = qattr->max_sge;
142 	attr->max_recv_sge = qattr->max_sge;
143 	attr->max_sge_rd = qattr->max_sge;
144 	attr->max_cq = qattr->max_cq;
145 	attr->max_cqe = qattr->max_cqe;
146 	attr->max_mr = qattr->max_mr;
147 	attr->max_mw = qattr->max_mw;
148 	attr->max_pd = qattr->max_pd;
149 	attr->atomic_cap = dev->atomic_cap;
150 	attr->max_qp_init_rd_atom =
151 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
152 	attr->max_qp_rd_atom =
153 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
154 		attr->max_qp_init_rd_atom);
155 
156 	attr->max_srq = qattr->max_srq;
157 	attr->max_srq_sge = qattr->max_srq_sge;
158 	attr->max_srq_wr = qattr->max_srq_wr;
159 
160 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
161 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
162 	attr->max_pkeys = qattr->max_pkey;
163 	attr->max_ah = qattr->max_ah;
164 
165 	return 0;
166 }
167 
168 static inline void get_link_speed_and_width(int speed, u16 *ib_speed,
169 					    u8 *ib_width)
170 {
171 	switch (speed) {
172 	case 1000:
173 		*ib_speed = IB_SPEED_SDR;
174 		*ib_width = IB_WIDTH_1X;
175 		break;
176 	case 10000:
177 		*ib_speed = IB_SPEED_QDR;
178 		*ib_width = IB_WIDTH_1X;
179 		break;
180 
181 	case 20000:
182 		*ib_speed = IB_SPEED_DDR;
183 		*ib_width = IB_WIDTH_4X;
184 		break;
185 
186 	case 25000:
187 		*ib_speed = IB_SPEED_EDR;
188 		*ib_width = IB_WIDTH_1X;
189 		break;
190 
191 	case 40000:
192 		*ib_speed = IB_SPEED_QDR;
193 		*ib_width = IB_WIDTH_4X;
194 		break;
195 
196 	case 50000:
197 		*ib_speed = IB_SPEED_HDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 100000:
202 		*ib_speed = IB_SPEED_EDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	default:
207 		/* Unsupported */
208 		*ib_speed = IB_SPEED_SDR;
209 		*ib_width = IB_WIDTH_1X;
210 	}
211 }
212 
213 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
214 {
215 	struct qedr_dev *dev;
216 	struct qed_rdma_port *rdma_port;
217 
218 	dev = get_qedr_dev(ibdev);
219 
220 	if (!dev->rdma_ctx) {
221 		DP_ERR(dev, "rdma_ctx is NULL\n");
222 		return -EINVAL;
223 	}
224 
225 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
226 
227 	/* *attr being zeroed by the caller, avoid zeroing it here */
228 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
229 		attr->state = IB_PORT_ACTIVE;
230 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
231 	} else {
232 		attr->state = IB_PORT_DOWN;
233 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
234 	}
235 	attr->max_mtu = IB_MTU_4096;
236 	attr->lid = 0;
237 	attr->lmc = 0;
238 	attr->sm_lid = 0;
239 	attr->sm_sl = 0;
240 	attr->ip_gids = true;
241 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
242 		attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu);
243 		attr->gid_tbl_len = 1;
244 	} else {
245 		attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
246 		attr->gid_tbl_len = QEDR_MAX_SGID;
247 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
248 	}
249 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
250 	attr->qkey_viol_cntr = 0;
251 	get_link_speed_and_width(rdma_port->link_speed,
252 				 &attr->active_speed, &attr->active_width);
253 	attr->max_msg_sz = rdma_port->max_msg_size;
254 	attr->max_vl_num = 4;
255 
256 	return 0;
257 }
258 
259 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
260 {
261 	struct ib_device *ibdev = uctx->device;
262 	int rc;
263 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
264 	struct qedr_alloc_ucontext_resp uresp = {};
265 	struct qedr_alloc_ucontext_req ureq = {};
266 	struct qedr_dev *dev = get_qedr_dev(ibdev);
267 	struct qed_rdma_add_user_out_params oparams;
268 	struct qedr_user_mmap_entry *entry;
269 
270 	if (!udata)
271 		return -EFAULT;
272 
273 	if (udata->inlen) {
274 		rc = ib_copy_from_udata(&ureq, udata,
275 					min(sizeof(ureq), udata->inlen));
276 		if (rc) {
277 			DP_ERR(dev, "Problem copying data from user space\n");
278 			return -EFAULT;
279 		}
280 		ctx->edpm_mode = !!(ureq.context_flags &
281 				    QEDR_ALLOC_UCTX_EDPM_MODE);
282 		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
283 	}
284 
285 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
286 	if (rc) {
287 		DP_ERR(dev,
288 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
289 		       rc);
290 		return rc;
291 	}
292 
293 	ctx->dpi = oparams.dpi;
294 	ctx->dpi_addr = oparams.dpi_addr;
295 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
296 	ctx->dpi_size = oparams.dpi_size;
297 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
298 	if (!entry) {
299 		rc = -ENOMEM;
300 		goto err;
301 	}
302 
303 	entry->io_address = ctx->dpi_phys_addr;
304 	entry->length = ctx->dpi_size;
305 	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
306 	entry->dpi = ctx->dpi;
307 	entry->dev = dev;
308 	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
309 					 ctx->dpi_size);
310 	if (rc) {
311 		kfree(entry);
312 		goto err;
313 	}
314 	ctx->db_mmap_entry = &entry->rdma_entry;
315 
316 	if (!dev->user_dpm_enabled)
317 		uresp.dpm_flags = 0;
318 	else if (rdma_protocol_iwarp(&dev->ibdev, 1))
319 		uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
320 	else
321 		uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
322 				  QEDR_DPM_TYPE_ROCE_LEGACY |
323 				  QEDR_DPM_TYPE_ROCE_EDPM_MODE;
324 
325 	if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
326 		uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
327 		uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
328 		uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
329 		uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
330 	}
331 
332 	uresp.wids_enabled = 1;
333 	uresp.wid_count = oparams.wid_count;
334 	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
335 	uresp.db_size = ctx->dpi_size;
336 	uresp.max_send_wr = dev->attr.max_sqe;
337 	uresp.max_recv_wr = dev->attr.max_rqe;
338 	uresp.max_srq_wr = dev->attr.max_srq_wr;
339 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
340 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
341 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
342 	uresp.max_cqes = QEDR_MAX_CQES;
343 
344 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
345 	if (rc)
346 		goto err;
347 
348 	ctx->dev = dev;
349 
350 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
351 		 &ctx->ibucontext);
352 	return 0;
353 
354 err:
355 	if (!ctx->db_mmap_entry)
356 		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
357 	else
358 		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
359 
360 	return rc;
361 }
362 
363 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
364 {
365 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
366 
367 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
368 		 uctx);
369 
370 	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
371 }
372 
373 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
374 {
375 	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
376 	struct qedr_dev *dev = entry->dev;
377 
378 	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
379 		free_page((unsigned long)entry->address);
380 	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
381 		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
382 
383 	kfree(entry);
384 }
385 
386 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
387 {
388 	struct ib_device *dev = ucontext->device;
389 	size_t length = vma->vm_end - vma->vm_start;
390 	struct rdma_user_mmap_entry *rdma_entry;
391 	struct qedr_user_mmap_entry *entry;
392 	int rc = 0;
393 	u64 pfn;
394 
395 	ibdev_dbg(dev,
396 		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
397 		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
398 
399 	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
400 	if (!rdma_entry) {
401 		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
402 			  vma->vm_pgoff);
403 		return -EINVAL;
404 	}
405 	entry = get_qedr_mmap_entry(rdma_entry);
406 	ibdev_dbg(dev,
407 		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
408 		  entry->io_address, length, entry->mmap_flag);
409 
410 	switch (entry->mmap_flag) {
411 	case QEDR_USER_MMAP_IO_WC:
412 		pfn = entry->io_address >> PAGE_SHIFT;
413 		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
414 				       pgprot_writecombine(vma->vm_page_prot),
415 				       rdma_entry);
416 		break;
417 	case QEDR_USER_MMAP_PHYS_PAGE:
418 		rc = vm_insert_page(vma, vma->vm_start,
419 				    virt_to_page(entry->address));
420 		break;
421 	default:
422 		rc = -EINVAL;
423 	}
424 
425 	if (rc)
426 		ibdev_dbg(dev,
427 			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
428 			  entry->io_address, length, entry->mmap_flag, rc);
429 
430 	rdma_user_mmap_entry_put(rdma_entry);
431 	return rc;
432 }
433 
434 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
435 {
436 	struct ib_device *ibdev = ibpd->device;
437 	struct qedr_dev *dev = get_qedr_dev(ibdev);
438 	struct qedr_pd *pd = get_qedr_pd(ibpd);
439 	u16 pd_id;
440 	int rc;
441 
442 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
443 		 udata ? "User Lib" : "Kernel");
444 
445 	if (!dev->rdma_ctx) {
446 		DP_ERR(dev, "invalid RDMA context\n");
447 		return -EINVAL;
448 	}
449 
450 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
451 	if (rc)
452 		return rc;
453 
454 	pd->pd_id = pd_id;
455 
456 	if (udata) {
457 		struct qedr_alloc_pd_uresp uresp = {
458 			.pd_id = pd_id,
459 		};
460 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
461 			udata, struct qedr_ucontext, ibucontext);
462 
463 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
464 		if (rc) {
465 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
466 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
467 			return rc;
468 		}
469 
470 		pd->uctx = context;
471 		pd->uctx->pd = pd;
472 	}
473 
474 	return 0;
475 }
476 
477 int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
478 {
479 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
480 	struct qedr_pd *pd = get_qedr_pd(ibpd);
481 
482 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
483 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
484 	return 0;
485 }
486 
487 
488 int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
489 {
490 	struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
491 	struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd);
492 
493 	return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id);
494 }
495 
496 int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
497 {
498 	struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
499 	u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id;
500 
501 	dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id);
502 	return 0;
503 }
504 static void qedr_free_pbl(struct qedr_dev *dev,
505 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
506 {
507 	struct pci_dev *pdev = dev->pdev;
508 	int i;
509 
510 	for (i = 0; i < pbl_info->num_pbls; i++) {
511 		if (!pbl[i].va)
512 			continue;
513 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
514 				  pbl[i].va, pbl[i].pa);
515 	}
516 
517 	kfree(pbl);
518 }
519 
520 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
521 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
522 
523 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
524 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
525 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
526 
527 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
528 					   struct qedr_pbl_info *pbl_info,
529 					   gfp_t flags)
530 {
531 	struct pci_dev *pdev = dev->pdev;
532 	struct qedr_pbl *pbl_table;
533 	dma_addr_t *pbl_main_tbl;
534 	dma_addr_t pa;
535 	void *va;
536 	int i;
537 
538 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
539 	if (!pbl_table)
540 		return ERR_PTR(-ENOMEM);
541 
542 	for (i = 0; i < pbl_info->num_pbls; i++) {
543 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
544 					flags);
545 		if (!va)
546 			goto err;
547 
548 		pbl_table[i].va = va;
549 		pbl_table[i].pa = pa;
550 	}
551 
552 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
553 	 * the first one with physical pointers to all of the rest
554 	 */
555 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
556 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
557 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
558 
559 	return pbl_table;
560 
561 err:
562 	for (i--; i >= 0; i--)
563 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
564 				  pbl_table[i].va, pbl_table[i].pa);
565 
566 	qedr_free_pbl(dev, pbl_info, pbl_table);
567 
568 	return ERR_PTR(-ENOMEM);
569 }
570 
571 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
572 				struct qedr_pbl_info *pbl_info,
573 				u32 num_pbes, int two_layer_capable)
574 {
575 	u32 pbl_capacity;
576 	u32 pbl_size;
577 	u32 num_pbls;
578 
579 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
580 		if (num_pbes > MAX_PBES_TWO_LAYER) {
581 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
582 			       num_pbes);
583 			return -EINVAL;
584 		}
585 
586 		/* calculate required pbl page size */
587 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
588 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
589 			       NUM_PBES_ON_PAGE(pbl_size);
590 
591 		while (pbl_capacity < num_pbes) {
592 			pbl_size *= 2;
593 			pbl_capacity = pbl_size / sizeof(u64);
594 			pbl_capacity = pbl_capacity * pbl_capacity;
595 		}
596 
597 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
598 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
599 		pbl_info->two_layered = true;
600 	} else {
601 		/* One layered PBL */
602 		num_pbls = 1;
603 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
604 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
605 		pbl_info->two_layered = false;
606 	}
607 
608 	pbl_info->num_pbls = num_pbls;
609 	pbl_info->pbl_size = pbl_size;
610 	pbl_info->num_pbes = num_pbes;
611 
612 	DP_DEBUG(dev, QEDR_MSG_MR,
613 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
614 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
615 
616 	return 0;
617 }
618 
619 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
620 			       struct qedr_pbl *pbl,
621 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
622 {
623 	int pbe_cnt, total_num_pbes = 0;
624 	struct qedr_pbl *pbl_tbl;
625 	struct ib_block_iter biter;
626 	struct regpair *pbe;
627 
628 	if (!pbl_info->num_pbes)
629 		return;
630 
631 	/* If we have a two layered pbl, the first pbl points to the rest
632 	 * of the pbls and the first entry lays on the second pbl in the table
633 	 */
634 	if (pbl_info->two_layered)
635 		pbl_tbl = &pbl[1];
636 	else
637 		pbl_tbl = pbl;
638 
639 	pbe = (struct regpair *)pbl_tbl->va;
640 	if (!pbe) {
641 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
642 		return;
643 	}
644 
645 	pbe_cnt = 0;
646 
647 	rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) {
648 		u64 pg_addr = rdma_block_iter_dma_address(&biter);
649 
650 		pbe->lo = cpu_to_le32(pg_addr);
651 		pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
652 
653 		pbe_cnt++;
654 		total_num_pbes++;
655 		pbe++;
656 
657 		if (total_num_pbes == pbl_info->num_pbes)
658 			return;
659 
660 		/* If the given pbl is full storing the pbes, move to next pbl.
661 		 */
662 		if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
663 			pbl_tbl++;
664 			pbe = (struct regpair *)pbl_tbl->va;
665 			pbe_cnt = 0;
666 		}
667 	}
668 }
669 
670 static int qedr_db_recovery_add(struct qedr_dev *dev,
671 				void __iomem *db_addr,
672 				void *db_data,
673 				enum qed_db_rec_width db_width,
674 				enum qed_db_rec_space db_space)
675 {
676 	if (!db_data) {
677 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
678 		return 0;
679 	}
680 
681 	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
682 						 db_width, db_space);
683 }
684 
685 static void qedr_db_recovery_del(struct qedr_dev *dev,
686 				 void __iomem *db_addr,
687 				 void *db_data)
688 {
689 	if (!db_data) {
690 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
691 		return;
692 	}
693 
694 	/* Ignore return code as there is not much we can do about it. Error
695 	 * log will be printed inside.
696 	 */
697 	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
698 }
699 
700 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
701 			      struct qedr_cq *cq, struct ib_udata *udata,
702 			      u32 db_offset)
703 {
704 	struct qedr_create_cq_uresp uresp;
705 	int rc;
706 
707 	memset(&uresp, 0, sizeof(uresp));
708 
709 	uresp.db_offset = db_offset;
710 	uresp.icid = cq->icid;
711 	if (cq->q.db_mmap_entry)
712 		uresp.db_rec_addr =
713 			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
714 
715 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
716 	if (rc)
717 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
718 
719 	return rc;
720 }
721 
722 static void consume_cqe(struct qedr_cq *cq)
723 {
724 	if (cq->latest_cqe == cq->toggle_cqe)
725 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
726 
727 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
728 }
729 
730 static inline int qedr_align_cq_entries(int entries)
731 {
732 	u64 size, aligned_size;
733 
734 	/* We allocate an extra entry that we don't report to the FW. */
735 	size = (entries + 1) * QEDR_CQE_SIZE;
736 	aligned_size = ALIGN(size, PAGE_SIZE);
737 
738 	return aligned_size / QEDR_CQE_SIZE;
739 }
740 
741 static int qedr_init_user_db_rec(struct ib_udata *udata,
742 				 struct qedr_dev *dev, struct qedr_userq *q,
743 				 bool requires_db_rec)
744 {
745 	struct qedr_ucontext *uctx =
746 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
747 					  ibucontext);
748 	struct qedr_user_mmap_entry *entry;
749 	int rc;
750 
751 	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
752 	if (requires_db_rec == 0 || !uctx->db_rec)
753 		return 0;
754 
755 	/* Allocate a page for doorbell recovery, add to mmap */
756 	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
757 	if (!q->db_rec_data) {
758 		DP_ERR(dev, "get_zeroed_page failed\n");
759 		return -ENOMEM;
760 	}
761 
762 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
763 	if (!entry)
764 		goto err_free_db_data;
765 
766 	entry->address = q->db_rec_data;
767 	entry->length = PAGE_SIZE;
768 	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
769 	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
770 					 &entry->rdma_entry,
771 					 PAGE_SIZE);
772 	if (rc)
773 		goto err_free_entry;
774 
775 	q->db_mmap_entry = &entry->rdma_entry;
776 
777 	return 0;
778 
779 err_free_entry:
780 	kfree(entry);
781 
782 err_free_db_data:
783 	free_page((unsigned long)q->db_rec_data);
784 	q->db_rec_data = NULL;
785 	return -ENOMEM;
786 }
787 
788 static inline int qedr_init_user_queue(struct ib_udata *udata,
789 				       struct qedr_dev *dev,
790 				       struct qedr_userq *q, u64 buf_addr,
791 				       size_t buf_len, bool requires_db_rec,
792 				       int access,
793 				       int alloc_and_init)
794 {
795 	u32 fw_pages;
796 	int rc;
797 
798 	q->buf_addr = buf_addr;
799 	q->buf_len = buf_len;
800 	q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
801 	if (IS_ERR(q->umem)) {
802 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
803 		       PTR_ERR(q->umem));
804 		return PTR_ERR(q->umem);
805 	}
806 
807 	fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
808 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
809 	if (rc)
810 		goto err0;
811 
812 	if (alloc_and_init) {
813 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
814 		if (IS_ERR(q->pbl_tbl)) {
815 			rc = PTR_ERR(q->pbl_tbl);
816 			goto err0;
817 		}
818 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
819 				   FW_PAGE_SHIFT);
820 	} else {
821 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
822 		if (!q->pbl_tbl) {
823 			rc = -ENOMEM;
824 			goto err0;
825 		}
826 	}
827 
828 	/* mmap the user address used to store doorbell data for recovery */
829 	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
830 
831 err0:
832 	ib_umem_release(q->umem);
833 	q->umem = NULL;
834 
835 	return rc;
836 }
837 
838 static inline void qedr_init_cq_params(struct qedr_cq *cq,
839 				       struct qedr_ucontext *ctx,
840 				       struct qedr_dev *dev, int vector,
841 				       int chain_entries, int page_cnt,
842 				       u64 pbl_ptr,
843 				       struct qed_rdma_create_cq_in_params
844 				       *params)
845 {
846 	memset(params, 0, sizeof(*params));
847 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
848 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
849 	params->cnq_id = vector;
850 	params->cq_size = chain_entries - 1;
851 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
852 	params->pbl_num_pages = page_cnt;
853 	params->pbl_ptr = pbl_ptr;
854 	params->pbl_two_level = 0;
855 }
856 
857 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
858 {
859 	cq->db.data.agg_flags = flags;
860 	cq->db.data.value = cpu_to_le32(cons);
861 	writeq(cq->db.raw, cq->db_addr);
862 }
863 
864 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
865 {
866 	struct qedr_cq *cq = get_qedr_cq(ibcq);
867 	unsigned long sflags;
868 	struct qedr_dev *dev;
869 
870 	dev = get_qedr_dev(ibcq->device);
871 
872 	if (cq->destroyed) {
873 		DP_ERR(dev,
874 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
875 		       cq, cq->icid);
876 		return -EINVAL;
877 	}
878 
879 
880 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
881 		return 0;
882 
883 	spin_lock_irqsave(&cq->cq_lock, sflags);
884 
885 	cq->arm_flags = 0;
886 
887 	if (flags & IB_CQ_SOLICITED)
888 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
889 
890 	if (flags & IB_CQ_NEXT_COMP)
891 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
892 
893 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
894 
895 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
896 
897 	return 0;
898 }
899 
900 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
901 		   struct ib_udata *udata)
902 {
903 	struct ib_device *ibdev = ibcq->device;
904 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
905 		udata, struct qedr_ucontext, ibucontext);
906 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
907 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
908 	struct qed_chain_init_params chain_params = {
909 		.mode		= QED_CHAIN_MODE_PBL,
910 		.intended_use	= QED_CHAIN_USE_TO_CONSUME,
911 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
912 		.elem_size	= sizeof(union rdma_cqe),
913 	};
914 	struct qedr_dev *dev = get_qedr_dev(ibdev);
915 	struct qed_rdma_create_cq_in_params params;
916 	struct qedr_create_cq_ureq ureq = {};
917 	int vector = attr->comp_vector;
918 	int entries = attr->cqe;
919 	struct qedr_cq *cq = get_qedr_cq(ibcq);
920 	int chain_entries;
921 	u32 db_offset;
922 	int page_cnt;
923 	u64 pbl_ptr;
924 	u16 icid;
925 	int rc;
926 
927 	DP_DEBUG(dev, QEDR_MSG_INIT,
928 		 "create_cq: called from %s. entries=%d, vector=%d\n",
929 		 udata ? "User Lib" : "Kernel", entries, vector);
930 
931 	if (entries > QEDR_MAX_CQES) {
932 		DP_ERR(dev,
933 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
934 		       entries, QEDR_MAX_CQES);
935 		return -EINVAL;
936 	}
937 
938 	chain_entries = qedr_align_cq_entries(entries);
939 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
940 	chain_params.num_elems = chain_entries;
941 
942 	/* calc db offset. user will add DPI base, kernel will add db addr */
943 	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
944 
945 	if (udata) {
946 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
947 							 udata->inlen))) {
948 			DP_ERR(dev,
949 			       "create cq: problem copying data from user space\n");
950 			goto err0;
951 		}
952 
953 		if (!ureq.len) {
954 			DP_ERR(dev,
955 			       "create cq: cannot create a cq with 0 entries\n");
956 			goto err0;
957 		}
958 
959 		cq->cq_type = QEDR_CQ_TYPE_USER;
960 
961 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
962 					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
963 					  1);
964 		if (rc)
965 			goto err0;
966 
967 		pbl_ptr = cq->q.pbl_tbl->pa;
968 		page_cnt = cq->q.pbl_info.num_pbes;
969 
970 		cq->ibcq.cqe = chain_entries;
971 		cq->q.db_addr = ctx->dpi_addr + db_offset;
972 	} else {
973 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
974 
975 		rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
976 						   &chain_params);
977 		if (rc)
978 			goto err0;
979 
980 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
981 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
982 		cq->ibcq.cqe = cq->pbl.capacity;
983 	}
984 
985 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
986 			    pbl_ptr, &params);
987 
988 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
989 	if (rc)
990 		goto err1;
991 
992 	cq->icid = icid;
993 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
994 	spin_lock_init(&cq->cq_lock);
995 
996 	if (udata) {
997 		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
998 		if (rc)
999 			goto err2;
1000 
1001 		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
1002 					  &cq->q.db_rec_data->db_data,
1003 					  DB_REC_WIDTH_64B,
1004 					  DB_REC_USER);
1005 		if (rc)
1006 			goto err2;
1007 
1008 	} else {
1009 		/* Generate doorbell address. */
1010 		cq->db.data.icid = cq->icid;
1011 		cq->db_addr = dev->db_addr + db_offset;
1012 		cq->db.data.params = DB_AGG_CMD_MAX <<
1013 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1014 
1015 		/* point to the very last element, passing it we will toggle */
1016 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1017 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1018 		cq->latest_cqe = NULL;
1019 		consume_cqe(cq);
1020 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1021 
1022 		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1023 					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
1024 		if (rc)
1025 			goto err2;
1026 	}
1027 
1028 	DP_DEBUG(dev, QEDR_MSG_CQ,
1029 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1030 		 cq->icid, cq, params.cq_size);
1031 
1032 	return 0;
1033 
1034 err2:
1035 	destroy_iparams.icid = cq->icid;
1036 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1037 				  &destroy_oparams);
1038 err1:
1039 	if (udata) {
1040 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1041 		ib_umem_release(cq->q.umem);
1042 		if (cq->q.db_mmap_entry)
1043 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1044 	} else {
1045 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1046 	}
1047 err0:
1048 	return -EINVAL;
1049 }
1050 
1051 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1052 {
1053 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1054 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1055 
1056 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1057 
1058 	return 0;
1059 }
1060 
1061 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1062 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1063 
1064 int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1065 {
1066 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1067 	struct qed_rdma_destroy_cq_out_params oparams;
1068 	struct qed_rdma_destroy_cq_in_params iparams;
1069 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1070 	int iter;
1071 
1072 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1073 
1074 	cq->destroyed = 1;
1075 
1076 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1077 	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1078 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1079 		return 0;
1080 	}
1081 
1082 	iparams.icid = cq->icid;
1083 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1084 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1085 
1086 	if (udata) {
1087 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1088 		ib_umem_release(cq->q.umem);
1089 
1090 		if (cq->q.db_rec_data) {
1091 			qedr_db_recovery_del(dev, cq->q.db_addr,
1092 					     &cq->q.db_rec_data->db_data);
1093 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1094 		}
1095 	} else {
1096 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1097 	}
1098 
1099 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1100 	 * wait until all CNQ interrupts, if any, are received. This will always
1101 	 * happen and will always happen very fast. If not, then a serious error
1102 	 * has occured. That is why we can use a long delay.
1103 	 * We spin for a short time so we don’t lose time on context switching
1104 	 * in case all the completions are handled in that span. Otherwise
1105 	 * we sleep for a while and check again. Since the CNQ may be
1106 	 * associated with (only) the current CPU we use msleep to allow the
1107 	 * current CPU to be freed.
1108 	 * The CNQ notification is increased in qedr_irq_handler().
1109 	 */
1110 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1111 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1112 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1113 		iter--;
1114 	}
1115 
1116 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1117 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1118 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1119 		iter--;
1120 	}
1121 
1122 	/* Note that we don't need to have explicit code to wait for the
1123 	 * completion of the event handler because it is invoked from the EQ.
1124 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1125 	 * be certain that there's no event handler in process.
1126 	 */
1127 	return 0;
1128 }
1129 
1130 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1131 					  struct ib_qp_attr *attr,
1132 					  int attr_mask,
1133 					  struct qed_rdma_modify_qp_in_params
1134 					  *qp_params)
1135 {
1136 	const struct ib_gid_attr *gid_attr;
1137 	enum rdma_network_type nw_type;
1138 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1139 	u32 ipv4_addr;
1140 	int ret;
1141 	int i;
1142 
1143 	gid_attr = grh->sgid_attr;
1144 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1145 	if (ret)
1146 		return ret;
1147 
1148 	nw_type = rdma_gid_attr_network_type(gid_attr);
1149 	switch (nw_type) {
1150 	case RDMA_NETWORK_IPV6:
1151 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1152 		       sizeof(qp_params->sgid));
1153 		memcpy(&qp_params->dgid.bytes[0],
1154 		       &grh->dgid,
1155 		       sizeof(qp_params->dgid));
1156 		qp_params->roce_mode = ROCE_V2_IPV6;
1157 		SET_FIELD(qp_params->modify_flags,
1158 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1159 		break;
1160 	case RDMA_NETWORK_ROCE_V1:
1161 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1162 		       sizeof(qp_params->sgid));
1163 		memcpy(&qp_params->dgid.bytes[0],
1164 		       &grh->dgid,
1165 		       sizeof(qp_params->dgid));
1166 		qp_params->roce_mode = ROCE_V1;
1167 		break;
1168 	case RDMA_NETWORK_IPV4:
1169 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1170 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1171 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1172 		qp_params->sgid.ipv4_addr = ipv4_addr;
1173 		ipv4_addr =
1174 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1175 		qp_params->dgid.ipv4_addr = ipv4_addr;
1176 		SET_FIELD(qp_params->modify_flags,
1177 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1178 		qp_params->roce_mode = ROCE_V2_IPV4;
1179 		break;
1180 	default:
1181 		return -EINVAL;
1182 	}
1183 
1184 	for (i = 0; i < 4; i++) {
1185 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1186 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1187 	}
1188 
1189 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1190 		qp_params->vlan_id = 0;
1191 
1192 	return 0;
1193 }
1194 
1195 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1196 			       struct ib_qp_init_attr *attrs,
1197 			       struct ib_udata *udata)
1198 {
1199 	struct qedr_device_attr *qattr = &dev->attr;
1200 
1201 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1202 	if (attrs->qp_type != IB_QPT_RC &&
1203 	    attrs->qp_type != IB_QPT_GSI &&
1204 	    attrs->qp_type != IB_QPT_XRC_INI &&
1205 	    attrs->qp_type != IB_QPT_XRC_TGT) {
1206 		DP_DEBUG(dev, QEDR_MSG_QP,
1207 			 "create qp: unsupported qp type=0x%x requested\n",
1208 			 attrs->qp_type);
1209 		return -EOPNOTSUPP;
1210 	}
1211 
1212 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1213 		DP_ERR(dev,
1214 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1215 		       attrs->cap.max_send_wr, qattr->max_sqe);
1216 		return -EINVAL;
1217 	}
1218 
1219 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1220 		DP_ERR(dev,
1221 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1222 		       attrs->cap.max_inline_data, qattr->max_inline);
1223 		return -EINVAL;
1224 	}
1225 
1226 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1227 		DP_ERR(dev,
1228 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1229 		       attrs->cap.max_send_sge, qattr->max_sge);
1230 		return -EINVAL;
1231 	}
1232 
1233 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1234 		DP_ERR(dev,
1235 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1236 		       attrs->cap.max_recv_sge, qattr->max_sge);
1237 		return -EINVAL;
1238 	}
1239 
1240 	/* verify consumer QPs are not trying to use GSI QP's CQ.
1241 	 * TGT QP isn't associated with RQ/SQ
1242 	 */
1243 	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
1244 	    (attrs->qp_type != IB_QPT_XRC_TGT)) {
1245 		struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
1246 		struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
1247 
1248 		if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) ||
1249 		    (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) {
1250 			DP_ERR(dev,
1251 			       "create qp: consumer QP cannot use GSI CQs.\n");
1252 			return -EINVAL;
1253 		}
1254 	}
1255 
1256 	return 0;
1257 }
1258 
1259 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1260 			       struct qedr_srq *srq, struct ib_udata *udata)
1261 {
1262 	struct qedr_create_srq_uresp uresp = {};
1263 	int rc;
1264 
1265 	uresp.srq_id = srq->srq_id;
1266 
1267 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1268 	if (rc)
1269 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1270 
1271 	return rc;
1272 }
1273 
1274 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1275 			       struct qedr_create_qp_uresp *uresp,
1276 			       struct qedr_qp *qp)
1277 {
1278 	/* iWARP requires two doorbells per RQ. */
1279 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1280 		uresp->rq_db_offset =
1281 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1282 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1283 	} else {
1284 		uresp->rq_db_offset =
1285 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1286 	}
1287 
1288 	uresp->rq_icid = qp->icid;
1289 	if (qp->urq.db_mmap_entry)
1290 		uresp->rq_db_rec_addr =
1291 			rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1292 }
1293 
1294 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1295 			       struct qedr_create_qp_uresp *uresp,
1296 			       struct qedr_qp *qp)
1297 {
1298 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1299 
1300 	/* iWARP uses the same cid for rq and sq */
1301 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1302 		uresp->sq_icid = qp->icid;
1303 	else
1304 		uresp->sq_icid = qp->icid + 1;
1305 
1306 	if (qp->usq.db_mmap_entry)
1307 		uresp->sq_db_rec_addr =
1308 			rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1309 }
1310 
1311 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1312 			      struct qedr_qp *qp, struct ib_udata *udata,
1313 			      struct qedr_create_qp_uresp *uresp)
1314 {
1315 	int rc;
1316 
1317 	memset(uresp, 0, sizeof(*uresp));
1318 
1319 	if (qedr_qp_has_sq(qp))
1320 		qedr_copy_sq_uresp(dev, uresp, qp);
1321 
1322 	if (qedr_qp_has_rq(qp))
1323 		qedr_copy_rq_uresp(dev, uresp, qp);
1324 
1325 	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1326 	uresp->qp_id = qp->qp_id;
1327 
1328 	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1329 	if (rc)
1330 		DP_ERR(dev,
1331 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1332 		       qp->icid);
1333 
1334 	return rc;
1335 }
1336 
1337 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1338 				      struct qedr_qp *qp,
1339 				      struct qedr_pd *pd,
1340 				      struct ib_qp_init_attr *attrs)
1341 {
1342 	spin_lock_init(&qp->q_lock);
1343 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1344 		kref_init(&qp->refcnt);
1345 		init_completion(&qp->iwarp_cm_comp);
1346 	}
1347 
1348 	qp->pd = pd;
1349 	qp->qp_type = attrs->qp_type;
1350 	qp->max_inline_data = attrs->cap.max_inline_data;
1351 	qp->state = QED_ROCE_QP_STATE_RESET;
1352 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1353 	qp->dev = dev;
1354 	if (qedr_qp_has_sq(qp)) {
1355 		qp->sq.max_sges = attrs->cap.max_send_sge;
1356 		qp->sq_cq = get_qedr_cq(attrs->send_cq);
1357 		DP_DEBUG(dev, QEDR_MSG_QP,
1358 			 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1359 			 qp->sq.max_sges, qp->sq_cq->icid);
1360 	}
1361 
1362 	if (attrs->srq)
1363 		qp->srq = get_qedr_srq(attrs->srq);
1364 
1365 	if (qedr_qp_has_rq(qp)) {
1366 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1367 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1368 		DP_DEBUG(dev, QEDR_MSG_QP,
1369 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1370 			 qp->rq.max_sges, qp->rq_cq->icid);
1371 	}
1372 
1373 	DP_DEBUG(dev, QEDR_MSG_QP,
1374 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1375 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1376 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1377 	DP_DEBUG(dev, QEDR_MSG_QP,
1378 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1379 		 qp->sq.max_sges, qp->sq_cq->icid);
1380 }
1381 
1382 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1383 {
1384 	int rc = 0;
1385 
1386 	if (qedr_qp_has_sq(qp)) {
1387 		qp->sq.db = dev->db_addr +
1388 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1389 		qp->sq.db_data.data.icid = qp->icid + 1;
1390 		rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data,
1391 					  DB_REC_WIDTH_32B, DB_REC_KERNEL);
1392 		if (rc)
1393 			return rc;
1394 	}
1395 
1396 	if (qedr_qp_has_rq(qp)) {
1397 		qp->rq.db = dev->db_addr +
1398 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1399 		qp->rq.db_data.data.icid = qp->icid;
1400 		rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data,
1401 					  DB_REC_WIDTH_32B, DB_REC_KERNEL);
1402 		if (rc && qedr_qp_has_sq(qp))
1403 			qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
1404 	}
1405 
1406 	return rc;
1407 }
1408 
1409 static int qedr_check_srq_params(struct qedr_dev *dev,
1410 				 struct ib_srq_init_attr *attrs,
1411 				 struct ib_udata *udata)
1412 {
1413 	struct qedr_device_attr *qattr = &dev->attr;
1414 
1415 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1416 		DP_ERR(dev,
1417 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1418 		       attrs->attr.max_wr, qattr->max_srq_wr);
1419 		return -EINVAL;
1420 	}
1421 
1422 	if (attrs->attr.max_sge > qattr->max_sge) {
1423 		DP_ERR(dev,
1424 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1425 		       attrs->attr.max_sge, qattr->max_sge);
1426 	}
1427 
1428 	if (!udata && attrs->srq_type == IB_SRQT_XRC) {
1429 		DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n");
1430 		return -EINVAL;
1431 	}
1432 
1433 	return 0;
1434 }
1435 
1436 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1437 {
1438 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1439 	ib_umem_release(srq->usrq.umem);
1440 	ib_umem_release(srq->prod_umem);
1441 }
1442 
1443 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1444 {
1445 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1446 	struct qedr_dev *dev = srq->dev;
1447 
1448 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1449 
1450 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1451 			  hw_srq->virt_prod_pair_addr,
1452 			  hw_srq->phy_prod_pair_addr);
1453 }
1454 
1455 static int qedr_init_srq_user_params(struct ib_udata *udata,
1456 				     struct qedr_srq *srq,
1457 				     struct qedr_create_srq_ureq *ureq,
1458 				     int access)
1459 {
1460 	struct scatterlist *sg;
1461 	int rc;
1462 
1463 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1464 				  ureq->srq_len, false, access, 1);
1465 	if (rc)
1466 		return rc;
1467 
1468 	srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1469 				     sizeof(struct rdma_srq_producers), access);
1470 	if (IS_ERR(srq->prod_umem)) {
1471 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1472 		ib_umem_release(srq->usrq.umem);
1473 		DP_ERR(srq->dev,
1474 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1475 		       PTR_ERR(srq->prod_umem));
1476 		return PTR_ERR(srq->prod_umem);
1477 	}
1478 
1479 	sg = srq->prod_umem->sg_head.sgl;
1480 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1481 
1482 	return 0;
1483 }
1484 
1485 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1486 					struct qedr_dev *dev,
1487 					struct ib_srq_init_attr *init_attr)
1488 {
1489 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1490 	struct qed_chain_init_params params = {
1491 		.mode		= QED_CHAIN_MODE_PBL,
1492 		.intended_use	= QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1493 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1494 		.elem_size	= QEDR_SRQ_WQE_ELEM_SIZE,
1495 	};
1496 	dma_addr_t phy_prod_pair_addr;
1497 	u32 num_elems;
1498 	void *va;
1499 	int rc;
1500 
1501 	va = dma_alloc_coherent(&dev->pdev->dev,
1502 				sizeof(struct rdma_srq_producers),
1503 				&phy_prod_pair_addr, GFP_KERNEL);
1504 	if (!va) {
1505 		DP_ERR(dev,
1506 		       "create srq: failed to allocate dma memory for producer\n");
1507 		return -ENOMEM;
1508 	}
1509 
1510 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1511 	hw_srq->virt_prod_pair_addr = va;
1512 
1513 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1514 	params.num_elems = num_elems;
1515 
1516 	rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
1517 	if (rc)
1518 		goto err0;
1519 
1520 	hw_srq->num_elems = num_elems;
1521 
1522 	return 0;
1523 
1524 err0:
1525 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1526 			  va, phy_prod_pair_addr);
1527 	return rc;
1528 }
1529 
1530 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1531 		    struct ib_udata *udata)
1532 {
1533 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1534 	struct qed_rdma_create_srq_in_params in_params = {};
1535 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1536 	struct qed_rdma_create_srq_out_params out_params;
1537 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1538 	struct qedr_create_srq_ureq ureq = {};
1539 	u64 pbl_base_addr, phy_prod_pair_addr;
1540 	struct qedr_srq_hwq_info *hw_srq;
1541 	u32 page_cnt, page_size;
1542 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1543 	int rc = 0;
1544 
1545 	DP_DEBUG(dev, QEDR_MSG_QP,
1546 		 "create SRQ called from %s (pd %p)\n",
1547 		 (udata) ? "User lib" : "kernel", pd);
1548 
1549 	rc = qedr_check_srq_params(dev, init_attr, udata);
1550 	if (rc)
1551 		return -EINVAL;
1552 
1553 	srq->dev = dev;
1554 	srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
1555 	hw_srq = &srq->hw_srq;
1556 	spin_lock_init(&srq->lock);
1557 
1558 	hw_srq->max_wr = init_attr->attr.max_wr;
1559 	hw_srq->max_sges = init_attr->attr.max_sge;
1560 
1561 	if (udata) {
1562 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1563 							 udata->inlen))) {
1564 			DP_ERR(dev,
1565 			       "create srq: problem copying data from user space\n");
1566 			goto err0;
1567 		}
1568 
1569 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1570 		if (rc)
1571 			goto err0;
1572 
1573 		page_cnt = srq->usrq.pbl_info.num_pbes;
1574 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1575 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1576 		page_size = PAGE_SIZE;
1577 	} else {
1578 		struct qed_chain *pbl;
1579 
1580 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1581 		if (rc)
1582 			goto err0;
1583 
1584 		pbl = &hw_srq->pbl;
1585 		page_cnt = qed_chain_get_page_cnt(pbl);
1586 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1587 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1588 		page_size = QED_CHAIN_PAGE_SIZE;
1589 	}
1590 
1591 	in_params.pd_id = pd->pd_id;
1592 	in_params.pbl_base_addr = pbl_base_addr;
1593 	in_params.prod_pair_addr = phy_prod_pair_addr;
1594 	in_params.num_pages = page_cnt;
1595 	in_params.page_size = page_size;
1596 	if (srq->is_xrc) {
1597 		struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd);
1598 		struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq);
1599 
1600 		in_params.is_xrc = 1;
1601 		in_params.xrcd_id = xrcd->xrcd_id;
1602 		in_params.cq_cid = cq->icid;
1603 	}
1604 
1605 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1606 	if (rc)
1607 		goto err1;
1608 
1609 	srq->srq_id = out_params.srq_id;
1610 
1611 	if (udata) {
1612 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1613 		if (rc)
1614 			goto err2;
1615 	}
1616 
1617 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1618 	if (rc)
1619 		goto err2;
1620 
1621 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1622 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1623 	return 0;
1624 
1625 err2:
1626 	destroy_in_params.srq_id = srq->srq_id;
1627 
1628 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1629 err1:
1630 	if (udata)
1631 		qedr_free_srq_user_params(srq);
1632 	else
1633 		qedr_free_srq_kernel_params(srq);
1634 err0:
1635 	return -EFAULT;
1636 }
1637 
1638 int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1639 {
1640 	struct qed_rdma_destroy_srq_in_params in_params = {};
1641 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1642 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1643 
1644 	xa_erase_irq(&dev->srqs, srq->srq_id);
1645 	in_params.srq_id = srq->srq_id;
1646 	in_params.is_xrc = srq->is_xrc;
1647 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1648 
1649 	if (ibsrq->uobject)
1650 		qedr_free_srq_user_params(srq);
1651 	else
1652 		qedr_free_srq_kernel_params(srq);
1653 
1654 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1655 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1656 		 srq->srq_id);
1657 	return 0;
1658 }
1659 
1660 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1661 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1662 {
1663 	struct qed_rdma_modify_srq_in_params in_params = {};
1664 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1665 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1666 	int rc;
1667 
1668 	if (attr_mask & IB_SRQ_MAX_WR) {
1669 		DP_ERR(dev,
1670 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1671 		       attr_mask, srq);
1672 		return -EINVAL;
1673 	}
1674 
1675 	if (attr_mask & IB_SRQ_LIMIT) {
1676 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1677 			DP_ERR(dev,
1678 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1679 			       attr->srq_limit, srq->hw_srq.max_wr);
1680 			return -EINVAL;
1681 		}
1682 
1683 		in_params.srq_id = srq->srq_id;
1684 		in_params.wqe_limit = attr->srq_limit;
1685 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1686 		if (rc)
1687 			return rc;
1688 	}
1689 
1690 	srq->srq_limit = attr->srq_limit;
1691 
1692 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1693 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1694 
1695 	return 0;
1696 }
1697 
1698 static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type)
1699 {
1700 	switch (ib_qp_type) {
1701 	case IB_QPT_RC:
1702 		return QED_RDMA_QP_TYPE_RC;
1703 	case IB_QPT_XRC_INI:
1704 		return QED_RDMA_QP_TYPE_XRC_INI;
1705 	case IB_QPT_XRC_TGT:
1706 		return QED_RDMA_QP_TYPE_XRC_TGT;
1707 	default:
1708 		return QED_RDMA_QP_TYPE_INVAL;
1709 	}
1710 }
1711 
1712 static inline void
1713 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1714 			      struct qedr_pd *pd,
1715 			      struct qedr_qp *qp,
1716 			      struct ib_qp_init_attr *attrs,
1717 			      bool fmr_and_reserved_lkey,
1718 			      struct qed_rdma_create_qp_in_params *params)
1719 {
1720 	/* QP handle to be written in an async event */
1721 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1722 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1723 
1724 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1725 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1726 	params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type);
1727 	params->stats_queue = 0;
1728 
1729 	if (pd) {
1730 		params->pd = pd->pd_id;
1731 		params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1732 	}
1733 
1734 	if (qedr_qp_has_sq(qp))
1735 		params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1736 
1737 	if (qedr_qp_has_rq(qp))
1738 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1739 
1740 	if (qedr_qp_has_srq(qp)) {
1741 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1742 		params->srq_id = qp->srq->srq_id;
1743 		params->use_srq = true;
1744 	} else {
1745 		params->srq_id = 0;
1746 		params->use_srq = false;
1747 	}
1748 }
1749 
1750 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1751 {
1752 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1753 		 "qp=%p. "
1754 		 "sq_addr=0x%llx, "
1755 		 "sq_len=%zd, "
1756 		 "rq_addr=0x%llx, "
1757 		 "rq_len=%zd"
1758 		 "\n",
1759 		 qp,
1760 		 qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0,
1761 		 qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0,
1762 		 qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0,
1763 		 qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0);
1764 }
1765 
1766 static inline void
1767 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1768 			    struct qedr_qp *qp,
1769 			    struct qed_rdma_create_qp_out_params *out_params)
1770 {
1771 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1772 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1773 
1774 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1775 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1776 	if (!qp->srq) {
1777 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1778 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1779 	}
1780 
1781 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1782 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1783 }
1784 
1785 static void qedr_cleanup_user(struct qedr_dev *dev,
1786 			      struct qedr_ucontext *ctx,
1787 			      struct qedr_qp *qp)
1788 {
1789 	if (qedr_qp_has_sq(qp)) {
1790 		ib_umem_release(qp->usq.umem);
1791 		qp->usq.umem = NULL;
1792 	}
1793 
1794 	if (qedr_qp_has_rq(qp)) {
1795 		ib_umem_release(qp->urq.umem);
1796 		qp->urq.umem = NULL;
1797 	}
1798 
1799 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1800 		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1801 		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1802 	} else {
1803 		kfree(qp->usq.pbl_tbl);
1804 		kfree(qp->urq.pbl_tbl);
1805 	}
1806 
1807 	if (qp->usq.db_rec_data) {
1808 		qedr_db_recovery_del(dev, qp->usq.db_addr,
1809 				     &qp->usq.db_rec_data->db_data);
1810 		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1811 	}
1812 
1813 	if (qp->urq.db_rec_data) {
1814 		qedr_db_recovery_del(dev, qp->urq.db_addr,
1815 				     &qp->urq.db_rec_data->db_data);
1816 		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1817 	}
1818 
1819 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1820 		qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1821 				     &qp->urq.db_rec_db2_data);
1822 }
1823 
1824 static int qedr_create_user_qp(struct qedr_dev *dev,
1825 			       struct qedr_qp *qp,
1826 			       struct ib_pd *ibpd,
1827 			       struct ib_udata *udata,
1828 			       struct ib_qp_init_attr *attrs)
1829 {
1830 	struct qed_rdma_create_qp_in_params in_params;
1831 	struct qed_rdma_create_qp_out_params out_params;
1832 	struct qedr_create_qp_uresp uresp = {};
1833 	struct qedr_create_qp_ureq ureq = {};
1834 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1835 	struct qedr_ucontext *ctx = NULL;
1836 	struct qedr_pd *pd = NULL;
1837 	int rc = 0;
1838 
1839 	qp->create_type = QEDR_QP_CREATE_USER;
1840 
1841 	if (ibpd) {
1842 		pd = get_qedr_pd(ibpd);
1843 		ctx = pd->uctx;
1844 	}
1845 
1846 	if (udata) {
1847 		rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1848 					udata->inlen));
1849 		if (rc) {
1850 			DP_ERR(dev, "Problem copying data from user space\n");
1851 			return rc;
1852 		}
1853 	}
1854 
1855 	if (qedr_qp_has_sq(qp)) {
1856 		/* SQ - read access only (0) */
1857 		rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1858 					  ureq.sq_len, true, 0, alloc_and_init);
1859 		if (rc)
1860 			return rc;
1861 	}
1862 
1863 	if (qedr_qp_has_rq(qp)) {
1864 		/* RQ - read access only (0) */
1865 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1866 					  ureq.rq_len, true, 0, alloc_and_init);
1867 		if (rc)
1868 			return rc;
1869 	}
1870 
1871 	memset(&in_params, 0, sizeof(in_params));
1872 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1873 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1874 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1875 
1876 	if (qp->qp_type == IB_QPT_XRC_TGT) {
1877 		struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd);
1878 
1879 		in_params.xrcd_id = xrcd->xrcd_id;
1880 		in_params.qp_handle_lo = qp->qp_id;
1881 		in_params.use_srq = 1;
1882 	}
1883 
1884 	if (qedr_qp_has_sq(qp)) {
1885 		in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1886 		in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1887 	}
1888 
1889 	if (qedr_qp_has_rq(qp)) {
1890 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1891 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1892 	}
1893 
1894 	if (ctx)
1895 		SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
1896 
1897 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1898 					      &in_params, &out_params);
1899 
1900 	if (!qp->qed_qp) {
1901 		rc = -ENOMEM;
1902 		goto err1;
1903 	}
1904 
1905 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1906 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1907 
1908 	qp->qp_id = out_params.qp_id;
1909 	qp->icid = out_params.icid;
1910 
1911 	if (udata) {
1912 		rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1913 		if (rc)
1914 			goto err;
1915 	}
1916 
1917 	/* db offset was calculated in copy_qp_uresp, now set in the user q */
1918 	if (qedr_qp_has_sq(qp)) {
1919 		qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1920 		rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1921 					  &qp->usq.db_rec_data->db_data,
1922 					  DB_REC_WIDTH_32B,
1923 					  DB_REC_USER);
1924 		if (rc)
1925 			goto err;
1926 	}
1927 
1928 	if (qedr_qp_has_rq(qp)) {
1929 		qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1930 		rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1931 					  &qp->urq.db_rec_data->db_data,
1932 					  DB_REC_WIDTH_32B,
1933 					  DB_REC_USER);
1934 		if (rc)
1935 			goto err;
1936 	}
1937 
1938 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1939 		rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1940 					  &qp->urq.db_rec_db2_data,
1941 					  DB_REC_WIDTH_32B,
1942 					  DB_REC_USER);
1943 		if (rc)
1944 			goto err;
1945 	}
1946 	qedr_qp_user_print(dev, qp);
1947 	return rc;
1948 err:
1949 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1950 	if (rc)
1951 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1952 
1953 err1:
1954 	qedr_cleanup_user(dev, ctx, qp);
1955 	return rc;
1956 }
1957 
1958 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1959 {
1960 	int rc;
1961 
1962 	qp->sq.db = dev->db_addr +
1963 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1964 	qp->sq.db_data.data.icid = qp->icid;
1965 
1966 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1967 				  &qp->sq.db_data,
1968 				  DB_REC_WIDTH_32B,
1969 				  DB_REC_KERNEL);
1970 	if (rc)
1971 		return rc;
1972 
1973 	qp->rq.db = dev->db_addr +
1974 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1975 	qp->rq.db_data.data.icid = qp->icid;
1976 	qp->rq.iwarp_db2 = dev->db_addr +
1977 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1978 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1979 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1980 
1981 	rc = qedr_db_recovery_add(dev, qp->rq.db,
1982 				  &qp->rq.db_data,
1983 				  DB_REC_WIDTH_32B,
1984 				  DB_REC_KERNEL);
1985 	if (rc)
1986 		return rc;
1987 
1988 	rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
1989 				  &qp->rq.iwarp_db2_data,
1990 				  DB_REC_WIDTH_32B,
1991 				  DB_REC_KERNEL);
1992 	return rc;
1993 }
1994 
1995 static int
1996 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1997 			   struct qedr_qp *qp,
1998 			   struct qed_rdma_create_qp_in_params *in_params,
1999 			   u32 n_sq_elems, u32 n_rq_elems)
2000 {
2001 	struct qed_rdma_create_qp_out_params out_params;
2002 	struct qed_chain_init_params params = {
2003 		.mode		= QED_CHAIN_MODE_PBL,
2004 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
2005 	};
2006 	int rc;
2007 
2008 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
2009 	params.num_elems = n_sq_elems;
2010 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
2011 
2012 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
2013 	if (rc)
2014 		return rc;
2015 
2016 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
2017 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
2018 
2019 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
2020 	params.num_elems = n_rq_elems;
2021 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
2022 
2023 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2024 	if (rc)
2025 		return rc;
2026 
2027 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
2028 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
2029 
2030 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
2031 					      in_params, &out_params);
2032 
2033 	if (!qp->qed_qp)
2034 		return -EINVAL;
2035 
2036 	qp->qp_id = out_params.qp_id;
2037 	qp->icid = out_params.icid;
2038 
2039 	return qedr_set_roce_db_info(dev, qp);
2040 }
2041 
2042 static int
2043 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
2044 			    struct qedr_qp *qp,
2045 			    struct qed_rdma_create_qp_in_params *in_params,
2046 			    u32 n_sq_elems, u32 n_rq_elems)
2047 {
2048 	struct qed_rdma_create_qp_out_params out_params;
2049 	struct qed_chain_init_params params = {
2050 		.mode		= QED_CHAIN_MODE_PBL,
2051 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
2052 	};
2053 	int rc;
2054 
2055 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
2056 						     QEDR_SQE_ELEMENT_SIZE,
2057 						     QED_CHAIN_PAGE_SIZE,
2058 						     QED_CHAIN_MODE_PBL);
2059 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
2060 						     QEDR_RQE_ELEMENT_SIZE,
2061 						     QED_CHAIN_PAGE_SIZE,
2062 						     QED_CHAIN_MODE_PBL);
2063 
2064 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
2065 					      in_params, &out_params);
2066 
2067 	if (!qp->qed_qp)
2068 		return -EINVAL;
2069 
2070 	/* Now we allocate the chain */
2071 
2072 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
2073 	params.num_elems = n_sq_elems;
2074 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
2075 	params.ext_pbl_virt = out_params.sq_pbl_virt;
2076 	params.ext_pbl_phys = out_params.sq_pbl_phys;
2077 
2078 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
2079 	if (rc)
2080 		goto err;
2081 
2082 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
2083 	params.num_elems = n_rq_elems;
2084 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
2085 	params.ext_pbl_virt = out_params.rq_pbl_virt;
2086 	params.ext_pbl_phys = out_params.rq_pbl_phys;
2087 
2088 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
2089 	if (rc)
2090 		goto err;
2091 
2092 	qp->qp_id = out_params.qp_id;
2093 	qp->icid = out_params.icid;
2094 
2095 	return qedr_set_iwarp_db_info(dev, qp);
2096 
2097 err:
2098 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2099 
2100 	return rc;
2101 }
2102 
2103 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2104 {
2105 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2106 	kfree(qp->wqe_wr_id);
2107 
2108 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2109 	kfree(qp->rqe_wr_id);
2110 
2111 	/* GSI qp is not registered to db mechanism so no need to delete */
2112 	if (qp->qp_type == IB_QPT_GSI)
2113 		return;
2114 
2115 	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2116 
2117 	if (!qp->srq) {
2118 		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2119 
2120 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2121 			qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2122 					     &qp->rq.iwarp_db2_data);
2123 	}
2124 }
2125 
2126 static int qedr_create_kernel_qp(struct qedr_dev *dev,
2127 				 struct qedr_qp *qp,
2128 				 struct ib_pd *ibpd,
2129 				 struct ib_qp_init_attr *attrs)
2130 {
2131 	struct qed_rdma_create_qp_in_params in_params;
2132 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2133 	int rc = -EINVAL;
2134 	u32 n_rq_elems;
2135 	u32 n_sq_elems;
2136 	u32 n_sq_entries;
2137 
2138 	memset(&in_params, 0, sizeof(in_params));
2139 	qp->create_type = QEDR_QP_CREATE_KERNEL;
2140 
2141 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2142 	 * the ring. The ring should allow at least a single WR, even if the
2143 	 * user requested none, due to allocation issues.
2144 	 * We should add an extra WR since the prod and cons indices of
2145 	 * wqe_wr_id are managed in such a way that the WQ is considered full
2146 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
2147 	 * double the number of entries due an iSER issue that pushes far more
2148 	 * WRs than indicated. If we decline its ib_post_send() then we get
2149 	 * error prints in the dmesg we'd like to avoid.
2150 	 */
2151 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2152 			      dev->attr.max_sqe);
2153 
2154 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2155 				GFP_KERNEL);
2156 	if (!qp->wqe_wr_id) {
2157 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2158 		return -ENOMEM;
2159 	}
2160 
2161 	/* QP handle to be written in CQE */
2162 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2163 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2164 
2165 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2166 	 * the ring. There ring should allow at least a single WR, even if the
2167 	 * user requested none, due to allocation issues.
2168 	 */
2169 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2170 
2171 	/* Allocate driver internal RQ array */
2172 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2173 				GFP_KERNEL);
2174 	if (!qp->rqe_wr_id) {
2175 		DP_ERR(dev,
2176 		       "create qp: failed RQ shadow memory allocation\n");
2177 		kfree(qp->wqe_wr_id);
2178 		return -ENOMEM;
2179 	}
2180 
2181 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2182 
2183 	n_sq_entries = attrs->cap.max_send_wr;
2184 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2185 	n_sq_entries = max_t(u32, n_sq_entries, 1);
2186 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2187 
2188 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2189 
2190 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2191 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2192 						 n_sq_elems, n_rq_elems);
2193 	else
2194 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2195 						n_sq_elems, n_rq_elems);
2196 	if (rc)
2197 		qedr_cleanup_kernel(dev, qp);
2198 
2199 	return rc;
2200 }
2201 
2202 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2203 				  struct ib_udata *udata)
2204 {
2205 	struct qedr_ucontext *ctx =
2206 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2207 					  ibucontext);
2208 	int rc;
2209 
2210 	if (qp->qp_type != IB_QPT_GSI) {
2211 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2212 		if (rc)
2213 			return rc;
2214 	}
2215 
2216 	if (qp->create_type == QEDR_QP_CREATE_USER)
2217 		qedr_cleanup_user(dev, ctx, qp);
2218 	else
2219 		qedr_cleanup_kernel(dev, qp);
2220 
2221 	return 0;
2222 }
2223 
2224 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
2225 			     struct ib_qp_init_attr *attrs,
2226 			     struct ib_udata *udata)
2227 {
2228 	struct qedr_xrcd *xrcd = NULL;
2229 	struct qedr_pd *pd = NULL;
2230 	struct qedr_dev *dev;
2231 	struct qedr_qp *qp;
2232 	struct ib_qp *ibqp;
2233 	int rc = 0;
2234 
2235 	if (attrs->qp_type == IB_QPT_XRC_TGT) {
2236 		xrcd = get_qedr_xrcd(attrs->xrcd);
2237 		dev = get_qedr_dev(xrcd->ibxrcd.device);
2238 	} else {
2239 		pd = get_qedr_pd(ibpd);
2240 		dev = get_qedr_dev(ibpd->device);
2241 	}
2242 
2243 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2244 		 udata ? "user library" : "kernel", pd);
2245 
2246 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2247 	if (rc)
2248 		return ERR_PTR(rc);
2249 
2250 	DP_DEBUG(dev, QEDR_MSG_QP,
2251 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2252 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2253 		 get_qedr_cq(attrs->send_cq),
2254 		 get_qedr_cq(attrs->send_cq)->icid,
2255 		 get_qedr_cq(attrs->recv_cq),
2256 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2257 
2258 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2259 	if (!qp) {
2260 		DP_ERR(dev, "create qp: failed allocating memory\n");
2261 		return ERR_PTR(-ENOMEM);
2262 	}
2263 
2264 	qedr_set_common_qp_params(dev, qp, pd, attrs);
2265 
2266 	if (attrs->qp_type == IB_QPT_GSI) {
2267 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
2268 		if (IS_ERR(ibqp))
2269 			kfree(qp);
2270 		return ibqp;
2271 	}
2272 
2273 	if (udata || xrcd)
2274 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2275 	else
2276 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2277 
2278 	if (rc)
2279 		goto out_free_qp;
2280 
2281 	qp->ibqp.qp_num = qp->qp_id;
2282 
2283 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2284 		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2285 		if (rc)
2286 			goto out_free_qp_resources;
2287 	}
2288 
2289 	return &qp->ibqp;
2290 
2291 out_free_qp_resources:
2292 	qedr_free_qp_resources(dev, qp, udata);
2293 out_free_qp:
2294 	kfree(qp);
2295 
2296 	return ERR_PTR(-EFAULT);
2297 }
2298 
2299 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2300 {
2301 	switch (qp_state) {
2302 	case QED_ROCE_QP_STATE_RESET:
2303 		return IB_QPS_RESET;
2304 	case QED_ROCE_QP_STATE_INIT:
2305 		return IB_QPS_INIT;
2306 	case QED_ROCE_QP_STATE_RTR:
2307 		return IB_QPS_RTR;
2308 	case QED_ROCE_QP_STATE_RTS:
2309 		return IB_QPS_RTS;
2310 	case QED_ROCE_QP_STATE_SQD:
2311 		return IB_QPS_SQD;
2312 	case QED_ROCE_QP_STATE_ERR:
2313 		return IB_QPS_ERR;
2314 	case QED_ROCE_QP_STATE_SQE:
2315 		return IB_QPS_SQE;
2316 	}
2317 	return IB_QPS_ERR;
2318 }
2319 
2320 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2321 					enum ib_qp_state qp_state)
2322 {
2323 	switch (qp_state) {
2324 	case IB_QPS_RESET:
2325 		return QED_ROCE_QP_STATE_RESET;
2326 	case IB_QPS_INIT:
2327 		return QED_ROCE_QP_STATE_INIT;
2328 	case IB_QPS_RTR:
2329 		return QED_ROCE_QP_STATE_RTR;
2330 	case IB_QPS_RTS:
2331 		return QED_ROCE_QP_STATE_RTS;
2332 	case IB_QPS_SQD:
2333 		return QED_ROCE_QP_STATE_SQD;
2334 	case IB_QPS_ERR:
2335 		return QED_ROCE_QP_STATE_ERR;
2336 	default:
2337 		return QED_ROCE_QP_STATE_ERR;
2338 	}
2339 }
2340 
2341 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2342 {
2343 	qed_chain_reset(&qph->pbl);
2344 	qph->prod = 0;
2345 	qph->cons = 0;
2346 	qph->wqe_cons = 0;
2347 	qph->db_data.data.value = cpu_to_le16(0);
2348 }
2349 
2350 static int qedr_update_qp_state(struct qedr_dev *dev,
2351 				struct qedr_qp *qp,
2352 				enum qed_roce_qp_state cur_state,
2353 				enum qed_roce_qp_state new_state)
2354 {
2355 	int status = 0;
2356 
2357 	if (new_state == cur_state)
2358 		return 0;
2359 
2360 	switch (cur_state) {
2361 	case QED_ROCE_QP_STATE_RESET:
2362 		switch (new_state) {
2363 		case QED_ROCE_QP_STATE_INIT:
2364 			qp->prev_wqe_size = 0;
2365 			qedr_reset_qp_hwq_info(&qp->sq);
2366 			qedr_reset_qp_hwq_info(&qp->rq);
2367 			break;
2368 		default:
2369 			status = -EINVAL;
2370 			break;
2371 		}
2372 		break;
2373 	case QED_ROCE_QP_STATE_INIT:
2374 		switch (new_state) {
2375 		case QED_ROCE_QP_STATE_RTR:
2376 			/* Update doorbell (in case post_recv was
2377 			 * done before move to RTR)
2378 			 */
2379 
2380 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2381 				writel(qp->rq.db_data.raw, qp->rq.db);
2382 			}
2383 			break;
2384 		case QED_ROCE_QP_STATE_ERR:
2385 			break;
2386 		default:
2387 			/* Invalid state change. */
2388 			status = -EINVAL;
2389 			break;
2390 		}
2391 		break;
2392 	case QED_ROCE_QP_STATE_RTR:
2393 		/* RTR->XXX */
2394 		switch (new_state) {
2395 		case QED_ROCE_QP_STATE_RTS:
2396 			break;
2397 		case QED_ROCE_QP_STATE_ERR:
2398 			break;
2399 		default:
2400 			/* Invalid state change. */
2401 			status = -EINVAL;
2402 			break;
2403 		}
2404 		break;
2405 	case QED_ROCE_QP_STATE_RTS:
2406 		/* RTS->XXX */
2407 		switch (new_state) {
2408 		case QED_ROCE_QP_STATE_SQD:
2409 			break;
2410 		case QED_ROCE_QP_STATE_ERR:
2411 			break;
2412 		default:
2413 			/* Invalid state change. */
2414 			status = -EINVAL;
2415 			break;
2416 		}
2417 		break;
2418 	case QED_ROCE_QP_STATE_SQD:
2419 		/* SQD->XXX */
2420 		switch (new_state) {
2421 		case QED_ROCE_QP_STATE_RTS:
2422 		case QED_ROCE_QP_STATE_ERR:
2423 			break;
2424 		default:
2425 			/* Invalid state change. */
2426 			status = -EINVAL;
2427 			break;
2428 		}
2429 		break;
2430 	case QED_ROCE_QP_STATE_ERR:
2431 		/* ERR->XXX */
2432 		switch (new_state) {
2433 		case QED_ROCE_QP_STATE_RESET:
2434 			if ((qp->rq.prod != qp->rq.cons) ||
2435 			    (qp->sq.prod != qp->sq.cons)) {
2436 				DP_NOTICE(dev,
2437 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2438 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2439 					  qp->sq.cons);
2440 				status = -EINVAL;
2441 			}
2442 			break;
2443 		default:
2444 			status = -EINVAL;
2445 			break;
2446 		}
2447 		break;
2448 	default:
2449 		status = -EINVAL;
2450 		break;
2451 	}
2452 
2453 	return status;
2454 }
2455 
2456 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2457 		   int attr_mask, struct ib_udata *udata)
2458 {
2459 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2460 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2461 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2462 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2463 	enum ib_qp_state old_qp_state, new_qp_state;
2464 	enum qed_roce_qp_state cur_state;
2465 	int rc = 0;
2466 
2467 	DP_DEBUG(dev, QEDR_MSG_QP,
2468 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2469 		 attr->qp_state);
2470 
2471 	old_qp_state = qedr_get_ibqp_state(qp->state);
2472 	if (attr_mask & IB_QP_STATE)
2473 		new_qp_state = attr->qp_state;
2474 	else
2475 		new_qp_state = old_qp_state;
2476 
2477 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2478 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2479 					ibqp->qp_type, attr_mask)) {
2480 			DP_ERR(dev,
2481 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2482 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2483 			       attr_mask, qp->qp_id, ibqp->qp_type,
2484 			       old_qp_state, new_qp_state);
2485 			rc = -EINVAL;
2486 			goto err;
2487 		}
2488 	}
2489 
2490 	/* Translate the masks... */
2491 	if (attr_mask & IB_QP_STATE) {
2492 		SET_FIELD(qp_params.modify_flags,
2493 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2494 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2495 	}
2496 
2497 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2498 		qp_params.sqd_async = true;
2499 
2500 	if (attr_mask & IB_QP_PKEY_INDEX) {
2501 		SET_FIELD(qp_params.modify_flags,
2502 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2503 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2504 			rc = -EINVAL;
2505 			goto err;
2506 		}
2507 
2508 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2509 	}
2510 
2511 	if (attr_mask & IB_QP_QKEY)
2512 		qp->qkey = attr->qkey;
2513 
2514 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2515 		SET_FIELD(qp_params.modify_flags,
2516 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2517 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2518 						  IB_ACCESS_REMOTE_READ;
2519 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2520 						   IB_ACCESS_REMOTE_WRITE;
2521 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2522 					       IB_ACCESS_REMOTE_ATOMIC;
2523 	}
2524 
2525 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2526 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2527 			return -EINVAL;
2528 
2529 		if (attr_mask & IB_QP_PATH_MTU) {
2530 			if (attr->path_mtu < IB_MTU_256 ||
2531 			    attr->path_mtu > IB_MTU_4096) {
2532 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2533 				rc = -EINVAL;
2534 				goto err;
2535 			}
2536 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2537 				      ib_mtu_enum_to_int(iboe_get_mtu
2538 							 (dev->ndev->mtu)));
2539 		}
2540 
2541 		if (!qp->mtu) {
2542 			qp->mtu =
2543 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2544 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2545 		}
2546 
2547 		SET_FIELD(qp_params.modify_flags,
2548 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2549 
2550 		qp_params.traffic_class_tos = grh->traffic_class;
2551 		qp_params.flow_label = grh->flow_label;
2552 		qp_params.hop_limit_ttl = grh->hop_limit;
2553 
2554 		qp->sgid_idx = grh->sgid_index;
2555 
2556 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2557 		if (rc) {
2558 			DP_ERR(dev,
2559 			       "modify qp: problems with GID index %d (rc=%d)\n",
2560 			       grh->sgid_index, rc);
2561 			return rc;
2562 		}
2563 
2564 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2565 				   qp_params.remote_mac_addr);
2566 		if (rc)
2567 			return rc;
2568 
2569 		qp_params.use_local_mac = true;
2570 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2571 
2572 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2573 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2574 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2575 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2576 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2577 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2578 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2579 			 qp_params.remote_mac_addr);
2580 
2581 		qp_params.mtu = qp->mtu;
2582 		qp_params.lb_indication = false;
2583 	}
2584 
2585 	if (!qp_params.mtu) {
2586 		/* Stay with current MTU */
2587 		if (qp->mtu)
2588 			qp_params.mtu = qp->mtu;
2589 		else
2590 			qp_params.mtu =
2591 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2592 	}
2593 
2594 	if (attr_mask & IB_QP_TIMEOUT) {
2595 		SET_FIELD(qp_params.modify_flags,
2596 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2597 
2598 		/* The received timeout value is an exponent used like this:
2599 		 *    "12.7.34 LOCAL ACK TIMEOUT
2600 		 *    Value representing the transport (ACK) timeout for use by
2601 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2602 		 * The FW expects timeout in msec so we need to divide the usec
2603 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2604 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2605 		 * The value of zero means infinite so we use a 'max_t' to make
2606 		 * sure that sub 1 msec values will be configured as 1 msec.
2607 		 */
2608 		if (attr->timeout)
2609 			qp_params.ack_timeout =
2610 					1 << max_t(int, attr->timeout - 8, 0);
2611 		else
2612 			qp_params.ack_timeout = 0;
2613 	}
2614 
2615 	if (attr_mask & IB_QP_RETRY_CNT) {
2616 		SET_FIELD(qp_params.modify_flags,
2617 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2618 		qp_params.retry_cnt = attr->retry_cnt;
2619 	}
2620 
2621 	if (attr_mask & IB_QP_RNR_RETRY) {
2622 		SET_FIELD(qp_params.modify_flags,
2623 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2624 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2625 	}
2626 
2627 	if (attr_mask & IB_QP_RQ_PSN) {
2628 		SET_FIELD(qp_params.modify_flags,
2629 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2630 		qp_params.rq_psn = attr->rq_psn;
2631 		qp->rq_psn = attr->rq_psn;
2632 	}
2633 
2634 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2635 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2636 			rc = -EINVAL;
2637 			DP_ERR(dev,
2638 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2639 			       attr->max_rd_atomic,
2640 			       dev->attr.max_qp_req_rd_atomic_resc);
2641 			goto err;
2642 		}
2643 
2644 		SET_FIELD(qp_params.modify_flags,
2645 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2646 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2647 	}
2648 
2649 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2650 		SET_FIELD(qp_params.modify_flags,
2651 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2652 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2653 	}
2654 
2655 	if (attr_mask & IB_QP_SQ_PSN) {
2656 		SET_FIELD(qp_params.modify_flags,
2657 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2658 		qp_params.sq_psn = attr->sq_psn;
2659 		qp->sq_psn = attr->sq_psn;
2660 	}
2661 
2662 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2663 		if (attr->max_dest_rd_atomic >
2664 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2665 			DP_ERR(dev,
2666 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2667 			       attr->max_dest_rd_atomic,
2668 			       dev->attr.max_qp_resp_rd_atomic_resc);
2669 
2670 			rc = -EINVAL;
2671 			goto err;
2672 		}
2673 
2674 		SET_FIELD(qp_params.modify_flags,
2675 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2676 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2677 	}
2678 
2679 	if (attr_mask & IB_QP_DEST_QPN) {
2680 		SET_FIELD(qp_params.modify_flags,
2681 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2682 
2683 		qp_params.dest_qp = attr->dest_qp_num;
2684 		qp->dest_qp_num = attr->dest_qp_num;
2685 	}
2686 
2687 	cur_state = qp->state;
2688 
2689 	/* Update the QP state before the actual ramrod to prevent a race with
2690 	 * fast path. Modifying the QP state to error will cause the device to
2691 	 * flush the CQEs and while polling the flushed CQEs will considered as
2692 	 * a potential issue if the QP isn't in error state.
2693 	 */
2694 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2695 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2696 		qp->state = QED_ROCE_QP_STATE_ERR;
2697 
2698 	if (qp->qp_type != IB_QPT_GSI)
2699 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2700 					      qp->qed_qp, &qp_params);
2701 
2702 	if (attr_mask & IB_QP_STATE) {
2703 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2704 			rc = qedr_update_qp_state(dev, qp, cur_state,
2705 						  qp_params.new_state);
2706 		qp->state = qp_params.new_state;
2707 	}
2708 
2709 err:
2710 	return rc;
2711 }
2712 
2713 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2714 {
2715 	int ib_qp_acc_flags = 0;
2716 
2717 	if (params->incoming_rdma_write_en)
2718 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2719 	if (params->incoming_rdma_read_en)
2720 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2721 	if (params->incoming_atomic_en)
2722 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2723 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2724 	return ib_qp_acc_flags;
2725 }
2726 
2727 int qedr_query_qp(struct ib_qp *ibqp,
2728 		  struct ib_qp_attr *qp_attr,
2729 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2730 {
2731 	struct qed_rdma_query_qp_out_params params;
2732 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2733 	struct qedr_dev *dev = qp->dev;
2734 	int rc = 0;
2735 
2736 	memset(&params, 0, sizeof(params));
2737 
2738 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2739 	if (rc)
2740 		goto err;
2741 
2742 	memset(qp_attr, 0, sizeof(*qp_attr));
2743 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2744 
2745 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2746 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2747 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2748 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2749 	qp_attr->rq_psn = params.rq_psn;
2750 	qp_attr->sq_psn = params.sq_psn;
2751 	qp_attr->dest_qp_num = params.dest_qp;
2752 
2753 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2754 
2755 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2756 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2757 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2758 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2759 	qp_attr->cap.max_inline_data = dev->attr.max_inline;
2760 	qp_init_attr->cap = qp_attr->cap;
2761 
2762 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2763 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2764 			params.flow_label, qp->sgid_idx,
2765 			params.hop_limit_ttl, params.traffic_class_tos);
2766 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2767 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2768 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2769 	qp_attr->timeout = params.timeout;
2770 	qp_attr->rnr_retry = params.rnr_retry;
2771 	qp_attr->retry_cnt = params.retry_cnt;
2772 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2773 	qp_attr->pkey_index = params.pkey_index;
2774 	qp_attr->port_num = 1;
2775 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2776 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2777 	qp_attr->alt_pkey_index = 0;
2778 	qp_attr->alt_port_num = 0;
2779 	qp_attr->alt_timeout = 0;
2780 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2781 
2782 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2783 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2784 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2785 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2786 
2787 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2788 		 qp_attr->cap.max_inline_data);
2789 
2790 err:
2791 	return rc;
2792 }
2793 
2794 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2795 {
2796 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2797 	struct qedr_dev *dev = qp->dev;
2798 	struct ib_qp_attr attr;
2799 	int attr_mask = 0;
2800 
2801 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2802 		 qp, qp->qp_type);
2803 
2804 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2805 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2806 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2807 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2808 
2809 			attr.qp_state = IB_QPS_ERR;
2810 			attr_mask |= IB_QP_STATE;
2811 
2812 			/* Change the QP state to ERROR */
2813 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2814 		}
2815 	} else {
2816 		/* If connection establishment started the WAIT_FOR_CONNECT
2817 		 * bit will be on and we need to Wait for the establishment
2818 		 * to complete before destroying the qp.
2819 		 */
2820 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2821 				     &qp->iwarp_cm_flags))
2822 			wait_for_completion(&qp->iwarp_cm_comp);
2823 
2824 		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2825 		 * bit will be on, and we need to wait for the disconnect to
2826 		 * complete before continuing. We can use the same completion,
2827 		 * iwarp_cm_comp, since this is the only place that waits for
2828 		 * this completion and it is sequential. In addition,
2829 		 * disconnect can't occur before the connection is fully
2830 		 * established, therefore if WAIT_FOR_DISCONNECT is on it
2831 		 * means WAIT_FOR_CONNECT is also on and the completion for
2832 		 * CONNECT already occurred.
2833 		 */
2834 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2835 				     &qp->iwarp_cm_flags))
2836 			wait_for_completion(&qp->iwarp_cm_comp);
2837 	}
2838 
2839 	if (qp->qp_type == IB_QPT_GSI)
2840 		qedr_destroy_gsi_qp(dev);
2841 
2842 	/* We need to remove the entry from the xarray before we release the
2843 	 * qp_id to avoid a race of the qp_id being reallocated and failing
2844 	 * on xa_insert
2845 	 */
2846 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2847 		xa_erase(&dev->qps, qp->qp_id);
2848 
2849 	qedr_free_qp_resources(dev, qp, udata);
2850 
2851 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2852 		qedr_iw_qp_rem_ref(&qp->ibqp);
2853 	else
2854 		kfree(qp);
2855 
2856 	return 0;
2857 }
2858 
2859 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2860 		   struct ib_udata *udata)
2861 {
2862 	struct qedr_ah *ah = get_qedr_ah(ibah);
2863 
2864 	rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
2865 
2866 	return 0;
2867 }
2868 
2869 int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2870 {
2871 	struct qedr_ah *ah = get_qedr_ah(ibah);
2872 
2873 	rdma_destroy_ah_attr(&ah->attr);
2874 	return 0;
2875 }
2876 
2877 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2878 {
2879 	struct qedr_pbl *pbl, *tmp;
2880 
2881 	if (info->pbl_table)
2882 		list_add_tail(&info->pbl_table->list_entry,
2883 			      &info->free_pbl_list);
2884 
2885 	if (!list_empty(&info->inuse_pbl_list))
2886 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2887 
2888 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2889 		list_del(&pbl->list_entry);
2890 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2891 	}
2892 }
2893 
2894 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2895 			size_t page_list_len, bool two_layered)
2896 {
2897 	struct qedr_pbl *tmp;
2898 	int rc;
2899 
2900 	INIT_LIST_HEAD(&info->free_pbl_list);
2901 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2902 
2903 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2904 				  page_list_len, two_layered);
2905 	if (rc)
2906 		goto done;
2907 
2908 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2909 	if (IS_ERR(info->pbl_table)) {
2910 		rc = PTR_ERR(info->pbl_table);
2911 		goto done;
2912 	}
2913 
2914 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2915 		 &info->pbl_table->pa);
2916 
2917 	/* in usual case we use 2 PBLs, so we add one to free
2918 	 * list and allocating another one
2919 	 */
2920 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2921 	if (IS_ERR(tmp)) {
2922 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2923 		goto done;
2924 	}
2925 
2926 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2927 
2928 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2929 
2930 done:
2931 	if (rc)
2932 		free_mr_info(dev, info);
2933 
2934 	return rc;
2935 }
2936 
2937 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2938 			       u64 usr_addr, int acc, struct ib_udata *udata)
2939 {
2940 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2941 	struct qedr_mr *mr;
2942 	struct qedr_pd *pd;
2943 	int rc = -ENOMEM;
2944 
2945 	pd = get_qedr_pd(ibpd);
2946 	DP_DEBUG(dev, QEDR_MSG_MR,
2947 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2948 		 pd->pd_id, start, len, usr_addr, acc);
2949 
2950 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2951 		return ERR_PTR(-EINVAL);
2952 
2953 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2954 	if (!mr)
2955 		return ERR_PTR(rc);
2956 
2957 	mr->type = QEDR_MR_USER;
2958 
2959 	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2960 	if (IS_ERR(mr->umem)) {
2961 		rc = -EFAULT;
2962 		goto err0;
2963 	}
2964 
2965 	rc = init_mr_info(dev, &mr->info,
2966 			  ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1);
2967 	if (rc)
2968 		goto err1;
2969 
2970 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2971 			   &mr->info.pbl_info, PAGE_SHIFT);
2972 
2973 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2974 	if (rc) {
2975 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2976 		goto err1;
2977 	}
2978 
2979 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2980 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2981 	mr->hw_mr.key = 0;
2982 	mr->hw_mr.pd = pd->pd_id;
2983 	mr->hw_mr.local_read = 1;
2984 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2985 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2986 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2987 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2988 	mr->hw_mr.mw_bind = false;
2989 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2990 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2991 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2992 	mr->hw_mr.page_size_log = PAGE_SHIFT;
2993 	mr->hw_mr.length = len;
2994 	mr->hw_mr.vaddr = usr_addr;
2995 	mr->hw_mr.phy_mr = false;
2996 	mr->hw_mr.dma_mr = false;
2997 
2998 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2999 	if (rc) {
3000 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3001 		goto err2;
3002 	}
3003 
3004 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3005 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3006 	    mr->hw_mr.remote_atomic)
3007 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3008 
3009 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
3010 		 mr->ibmr.lkey);
3011 	return &mr->ibmr;
3012 
3013 err2:
3014 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3015 err1:
3016 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
3017 err0:
3018 	kfree(mr);
3019 	return ERR_PTR(rc);
3020 }
3021 
3022 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
3023 {
3024 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
3025 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
3026 	int rc = 0;
3027 
3028 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
3029 	if (rc)
3030 		return rc;
3031 
3032 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3033 
3034 	if (mr->type != QEDR_MR_DMA)
3035 		free_mr_info(dev, &mr->info);
3036 
3037 	/* it could be user registered memory. */
3038 	ib_umem_release(mr->umem);
3039 
3040 	kfree(mr);
3041 
3042 	return rc;
3043 }
3044 
3045 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
3046 				       int max_page_list_len)
3047 {
3048 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3049 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3050 	struct qedr_mr *mr;
3051 	int rc = -ENOMEM;
3052 
3053 	DP_DEBUG(dev, QEDR_MSG_MR,
3054 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
3055 		 max_page_list_len);
3056 
3057 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3058 	if (!mr)
3059 		return ERR_PTR(rc);
3060 
3061 	mr->dev = dev;
3062 	mr->type = QEDR_MR_FRMR;
3063 
3064 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
3065 	if (rc)
3066 		goto err0;
3067 
3068 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3069 	if (rc) {
3070 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
3071 		goto err0;
3072 	}
3073 
3074 	/* Index only, 18 bit long, lkey = itid << 8 | key */
3075 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
3076 	mr->hw_mr.key = 0;
3077 	mr->hw_mr.pd = pd->pd_id;
3078 	mr->hw_mr.local_read = 1;
3079 	mr->hw_mr.local_write = 0;
3080 	mr->hw_mr.remote_read = 0;
3081 	mr->hw_mr.remote_write = 0;
3082 	mr->hw_mr.remote_atomic = 0;
3083 	mr->hw_mr.mw_bind = false;
3084 	mr->hw_mr.pbl_ptr = 0;
3085 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
3086 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
3087 	mr->hw_mr.length = 0;
3088 	mr->hw_mr.vaddr = 0;
3089 	mr->hw_mr.phy_mr = true;
3090 	mr->hw_mr.dma_mr = false;
3091 
3092 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3093 	if (rc) {
3094 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3095 		goto err1;
3096 	}
3097 
3098 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3099 	mr->ibmr.rkey = mr->ibmr.lkey;
3100 
3101 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
3102 	return mr;
3103 
3104 err1:
3105 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3106 err0:
3107 	kfree(mr);
3108 	return ERR_PTR(rc);
3109 }
3110 
3111 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3112 			    u32 max_num_sg)
3113 {
3114 	struct qedr_mr *mr;
3115 
3116 	if (mr_type != IB_MR_TYPE_MEM_REG)
3117 		return ERR_PTR(-EINVAL);
3118 
3119 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
3120 
3121 	if (IS_ERR(mr))
3122 		return ERR_PTR(-EINVAL);
3123 
3124 	return &mr->ibmr;
3125 }
3126 
3127 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3128 {
3129 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3130 	struct qedr_pbl *pbl_table;
3131 	struct regpair *pbe;
3132 	u32 pbes_in_page;
3133 
3134 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3135 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3136 		return -ENOMEM;
3137 	}
3138 
3139 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3140 		 mr->npages, addr);
3141 
3142 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3143 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3144 	pbe = (struct regpair *)pbl_table->va;
3145 	pbe +=  mr->npages % pbes_in_page;
3146 	pbe->lo = cpu_to_le32((u32)addr);
3147 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3148 
3149 	mr->npages++;
3150 
3151 	return 0;
3152 }
3153 
3154 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3155 {
3156 	int work = info->completed - info->completed_handled - 1;
3157 
3158 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3159 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3160 		struct qedr_pbl *pbl;
3161 
3162 		/* Free all the page list that are possible to be freed
3163 		 * (all the ones that were invalidated), under the assumption
3164 		 * that if an FMR was completed successfully that means that
3165 		 * if there was an invalidate operation before it also ended
3166 		 */
3167 		pbl = list_first_entry(&info->inuse_pbl_list,
3168 				       struct qedr_pbl, list_entry);
3169 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3170 		info->completed_handled++;
3171 	}
3172 }
3173 
3174 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3175 		   int sg_nents, unsigned int *sg_offset)
3176 {
3177 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3178 
3179 	mr->npages = 0;
3180 
3181 	handle_completed_mrs(mr->dev, &mr->info);
3182 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3183 }
3184 
3185 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3186 {
3187 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3188 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3189 	struct qedr_mr *mr;
3190 	int rc;
3191 
3192 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3193 	if (!mr)
3194 		return ERR_PTR(-ENOMEM);
3195 
3196 	mr->type = QEDR_MR_DMA;
3197 
3198 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3199 	if (rc) {
3200 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
3201 		goto err1;
3202 	}
3203 
3204 	/* index only, 18 bit long, lkey = itid << 8 | key */
3205 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3206 	mr->hw_mr.pd = pd->pd_id;
3207 	mr->hw_mr.local_read = 1;
3208 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3209 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3210 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3211 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3212 	mr->hw_mr.dma_mr = true;
3213 
3214 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3215 	if (rc) {
3216 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3217 		goto err2;
3218 	}
3219 
3220 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3221 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3222 	    mr->hw_mr.remote_atomic)
3223 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3224 
3225 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3226 	return &mr->ibmr;
3227 
3228 err2:
3229 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3230 err1:
3231 	kfree(mr);
3232 	return ERR_PTR(rc);
3233 }
3234 
3235 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3236 {
3237 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3238 }
3239 
3240 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3241 {
3242 	int i, len = 0;
3243 
3244 	for (i = 0; i < num_sge; i++)
3245 		len += sg_list[i].length;
3246 
3247 	return len;
3248 }
3249 
3250 static void swap_wqe_data64(u64 *p)
3251 {
3252 	int i;
3253 
3254 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3255 		*p = cpu_to_be64(cpu_to_le64(*p));
3256 }
3257 
3258 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3259 				       struct qedr_qp *qp, u8 *wqe_size,
3260 				       const struct ib_send_wr *wr,
3261 				       const struct ib_send_wr **bad_wr,
3262 				       u8 *bits, u8 bit)
3263 {
3264 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3265 	char *seg_prt, *wqe;
3266 	int i, seg_siz;
3267 
3268 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3269 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3270 		*bad_wr = wr;
3271 		return 0;
3272 	}
3273 
3274 	if (!data_size)
3275 		return data_size;
3276 
3277 	*bits |= bit;
3278 
3279 	seg_prt = NULL;
3280 	wqe = NULL;
3281 	seg_siz = 0;
3282 
3283 	/* Copy data inline */
3284 	for (i = 0; i < wr->num_sge; i++) {
3285 		u32 len = wr->sg_list[i].length;
3286 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3287 
3288 		while (len > 0) {
3289 			u32 cur;
3290 
3291 			/* New segment required */
3292 			if (!seg_siz) {
3293 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3294 				seg_prt = wqe;
3295 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3296 				(*wqe_size)++;
3297 			}
3298 
3299 			/* Calculate currently allowed length */
3300 			cur = min_t(u32, len, seg_siz);
3301 			memcpy(seg_prt, src, cur);
3302 
3303 			/* Update segment variables */
3304 			seg_prt += cur;
3305 			seg_siz -= cur;
3306 
3307 			/* Update sge variables */
3308 			src += cur;
3309 			len -= cur;
3310 
3311 			/* Swap fully-completed segments */
3312 			if (!seg_siz)
3313 				swap_wqe_data64((u64 *)wqe);
3314 		}
3315 	}
3316 
3317 	/* swap last not completed segment */
3318 	if (seg_siz)
3319 		swap_wqe_data64((u64 *)wqe);
3320 
3321 	return data_size;
3322 }
3323 
3324 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3325 	do {							\
3326 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3327 		(sge)->length = cpu_to_le32(vlength);		\
3328 		(sge)->flags = cpu_to_le32(vflags);		\
3329 	} while (0)
3330 
3331 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3332 	do {							\
3333 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3334 		(hdr)->num_sges = num_sge;			\
3335 	} while (0)
3336 
3337 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3338 	do {							\
3339 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3340 		(sge)->length = cpu_to_le32(vlength);		\
3341 		(sge)->l_key = cpu_to_le32(vlkey);		\
3342 	} while (0)
3343 
3344 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3345 				const struct ib_send_wr *wr)
3346 {
3347 	u32 data_size = 0;
3348 	int i;
3349 
3350 	for (i = 0; i < wr->num_sge; i++) {
3351 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3352 
3353 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3354 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3355 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3356 		data_size += wr->sg_list[i].length;
3357 	}
3358 
3359 	if (wqe_size)
3360 		*wqe_size += wr->num_sge;
3361 
3362 	return data_size;
3363 }
3364 
3365 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3366 				     struct qedr_qp *qp,
3367 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3368 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3369 				     const struct ib_send_wr *wr,
3370 				     const struct ib_send_wr **bad_wr)
3371 {
3372 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3373 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3374 
3375 	if (wr->send_flags & IB_SEND_INLINE &&
3376 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3377 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3378 		u8 flags = 0;
3379 
3380 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3381 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3382 						   bad_wr, &rwqe->flags, flags);
3383 	}
3384 
3385 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3386 }
3387 
3388 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3389 				     struct qedr_qp *qp,
3390 				     struct rdma_sq_send_wqe_1st *swqe,
3391 				     struct rdma_sq_send_wqe_2st *swqe2,
3392 				     const struct ib_send_wr *wr,
3393 				     const struct ib_send_wr **bad_wr)
3394 {
3395 	memset(swqe2, 0, sizeof(*swqe2));
3396 	if (wr->send_flags & IB_SEND_INLINE) {
3397 		u8 flags = 0;
3398 
3399 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3400 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3401 						   bad_wr, &swqe->flags, flags);
3402 	}
3403 
3404 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3405 }
3406 
3407 static int qedr_prepare_reg(struct qedr_qp *qp,
3408 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3409 			    const struct ib_reg_wr *wr)
3410 {
3411 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3412 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3413 
3414 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3415 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3416 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3417 	fwqe1->l_key = wr->key;
3418 
3419 	fwqe2->access_ctrl = 0;
3420 
3421 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3422 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3423 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3424 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3425 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3426 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3427 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3428 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3429 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3430 	fwqe2->fmr_ctrl = 0;
3431 
3432 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3433 		   ilog2(mr->ibmr.page_size) - 12);
3434 
3435 	fwqe2->length_hi = 0;
3436 	fwqe2->length_lo = mr->ibmr.length;
3437 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3438 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3439 
3440 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3441 
3442 	return 0;
3443 }
3444 
3445 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3446 {
3447 	switch (opcode) {
3448 	case IB_WR_RDMA_WRITE:
3449 	case IB_WR_RDMA_WRITE_WITH_IMM:
3450 		return IB_WC_RDMA_WRITE;
3451 	case IB_WR_SEND_WITH_IMM:
3452 	case IB_WR_SEND:
3453 	case IB_WR_SEND_WITH_INV:
3454 		return IB_WC_SEND;
3455 	case IB_WR_RDMA_READ:
3456 	case IB_WR_RDMA_READ_WITH_INV:
3457 		return IB_WC_RDMA_READ;
3458 	case IB_WR_ATOMIC_CMP_AND_SWP:
3459 		return IB_WC_COMP_SWAP;
3460 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3461 		return IB_WC_FETCH_ADD;
3462 	case IB_WR_REG_MR:
3463 		return IB_WC_REG_MR;
3464 	case IB_WR_LOCAL_INV:
3465 		return IB_WC_LOCAL_INV;
3466 	default:
3467 		return IB_WC_SEND;
3468 	}
3469 }
3470 
3471 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3472 				      const struct ib_send_wr *wr)
3473 {
3474 	int wq_is_full, err_wr, pbl_is_full;
3475 	struct qedr_dev *dev = qp->dev;
3476 
3477 	/* prevent SQ overflow and/or processing of a bad WR */
3478 	err_wr = wr->num_sge > qp->sq.max_sges;
3479 	wq_is_full = qedr_wq_is_full(&qp->sq);
3480 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3481 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3482 	if (wq_is_full || err_wr || pbl_is_full) {
3483 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3484 			DP_ERR(dev,
3485 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3486 			       qp);
3487 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3488 		}
3489 
3490 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3491 			DP_ERR(dev,
3492 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3493 			       qp);
3494 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3495 		}
3496 
3497 		if (pbl_is_full &&
3498 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3499 			DP_ERR(dev,
3500 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3501 			       qp);
3502 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3503 		}
3504 		return false;
3505 	}
3506 	return true;
3507 }
3508 
3509 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3510 			    const struct ib_send_wr **bad_wr)
3511 {
3512 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3513 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3514 	struct rdma_sq_atomic_wqe_1st *awqe1;
3515 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3516 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3517 	struct rdma_sq_send_wqe_2st *swqe2;
3518 	struct rdma_sq_local_inv_wqe *iwqe;
3519 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3520 	struct rdma_sq_send_wqe_1st *swqe;
3521 	struct rdma_sq_rdma_wqe_1st *rwqe;
3522 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3523 	struct rdma_sq_common_wqe *wqe;
3524 	u32 length;
3525 	int rc = 0;
3526 	bool comp;
3527 
3528 	if (!qedr_can_post_send(qp, wr)) {
3529 		*bad_wr = wr;
3530 		return -ENOMEM;
3531 	}
3532 
3533 	wqe = qed_chain_produce(&qp->sq.pbl);
3534 	qp->wqe_wr_id[qp->sq.prod].signaled =
3535 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3536 
3537 	wqe->flags = 0;
3538 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3539 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3540 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3541 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3542 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3543 		   !!(wr->send_flags & IB_SEND_FENCE));
3544 	wqe->prev_wqe_size = qp->prev_wqe_size;
3545 
3546 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3547 
3548 	switch (wr->opcode) {
3549 	case IB_WR_SEND_WITH_IMM:
3550 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3551 			rc = -EINVAL;
3552 			*bad_wr = wr;
3553 			break;
3554 		}
3555 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3556 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3557 		swqe->wqe_size = 2;
3558 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3559 
3560 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3561 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3562 						   wr, bad_wr);
3563 		swqe->length = cpu_to_le32(length);
3564 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3565 		qp->prev_wqe_size = swqe->wqe_size;
3566 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3567 		break;
3568 	case IB_WR_SEND:
3569 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3570 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3571 
3572 		swqe->wqe_size = 2;
3573 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3574 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3575 						   wr, bad_wr);
3576 		swqe->length = cpu_to_le32(length);
3577 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3578 		qp->prev_wqe_size = swqe->wqe_size;
3579 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3580 		break;
3581 	case IB_WR_SEND_WITH_INV:
3582 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3583 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3584 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3585 		swqe->wqe_size = 2;
3586 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3587 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3588 						   wr, bad_wr);
3589 		swqe->length = cpu_to_le32(length);
3590 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3591 		qp->prev_wqe_size = swqe->wqe_size;
3592 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3593 		break;
3594 
3595 	case IB_WR_RDMA_WRITE_WITH_IMM:
3596 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3597 			rc = -EINVAL;
3598 			*bad_wr = wr;
3599 			break;
3600 		}
3601 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3602 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3603 
3604 		rwqe->wqe_size = 2;
3605 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3606 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3607 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3608 						   wr, bad_wr);
3609 		rwqe->length = cpu_to_le32(length);
3610 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3611 		qp->prev_wqe_size = rwqe->wqe_size;
3612 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3613 		break;
3614 	case IB_WR_RDMA_WRITE:
3615 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3616 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3617 
3618 		rwqe->wqe_size = 2;
3619 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3620 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3621 						   wr, bad_wr);
3622 		rwqe->length = cpu_to_le32(length);
3623 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3624 		qp->prev_wqe_size = rwqe->wqe_size;
3625 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3626 		break;
3627 	case IB_WR_RDMA_READ_WITH_INV:
3628 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3629 		fallthrough;	/* same is identical to RDMA READ */
3630 
3631 	case IB_WR_RDMA_READ:
3632 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3633 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3634 
3635 		rwqe->wqe_size = 2;
3636 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3637 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3638 						   wr, bad_wr);
3639 		rwqe->length = cpu_to_le32(length);
3640 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3641 		qp->prev_wqe_size = rwqe->wqe_size;
3642 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3643 		break;
3644 
3645 	case IB_WR_ATOMIC_CMP_AND_SWP:
3646 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3647 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3648 		awqe1->wqe_size = 4;
3649 
3650 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3651 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3652 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3653 
3654 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3655 
3656 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3657 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3658 			DMA_REGPAIR_LE(awqe3->swap_data,
3659 				       atomic_wr(wr)->compare_add);
3660 		} else {
3661 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3662 			DMA_REGPAIR_LE(awqe3->swap_data,
3663 				       atomic_wr(wr)->swap);
3664 			DMA_REGPAIR_LE(awqe3->cmp_data,
3665 				       atomic_wr(wr)->compare_add);
3666 		}
3667 
3668 		qedr_prepare_sq_sges(qp, NULL, wr);
3669 
3670 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3671 		qp->prev_wqe_size = awqe1->wqe_size;
3672 		break;
3673 
3674 	case IB_WR_LOCAL_INV:
3675 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3676 		iwqe->wqe_size = 1;
3677 
3678 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3679 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3680 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3681 		qp->prev_wqe_size = iwqe->wqe_size;
3682 		break;
3683 	case IB_WR_REG_MR:
3684 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3685 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3686 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3687 		fwqe1->wqe_size = 2;
3688 
3689 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3690 		if (rc) {
3691 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3692 			*bad_wr = wr;
3693 			break;
3694 		}
3695 
3696 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3697 		qp->prev_wqe_size = fwqe1->wqe_size;
3698 		break;
3699 	default:
3700 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3701 		rc = -EINVAL;
3702 		*bad_wr = wr;
3703 		break;
3704 	}
3705 
3706 	if (*bad_wr) {
3707 		u16 value;
3708 
3709 		/* Restore prod to its position before
3710 		 * this WR was processed
3711 		 */
3712 		value = le16_to_cpu(qp->sq.db_data.data.value);
3713 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3714 
3715 		/* Restore prev_wqe_size */
3716 		qp->prev_wqe_size = wqe->prev_wqe_size;
3717 		rc = -EINVAL;
3718 		DP_ERR(dev, "POST SEND FAILED\n");
3719 	}
3720 
3721 	return rc;
3722 }
3723 
3724 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3725 		   const struct ib_send_wr **bad_wr)
3726 {
3727 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3728 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3729 	unsigned long flags;
3730 	int rc = 0;
3731 
3732 	*bad_wr = NULL;
3733 
3734 	if (qp->qp_type == IB_QPT_GSI)
3735 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3736 
3737 	spin_lock_irqsave(&qp->q_lock, flags);
3738 
3739 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3740 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3741 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3742 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3743 			spin_unlock_irqrestore(&qp->q_lock, flags);
3744 			*bad_wr = wr;
3745 			DP_DEBUG(dev, QEDR_MSG_CQ,
3746 				 "QP in wrong state! QP icid=0x%x state %d\n",
3747 				 qp->icid, qp->state);
3748 			return -EINVAL;
3749 		}
3750 	}
3751 
3752 	while (wr) {
3753 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3754 		if (rc)
3755 			break;
3756 
3757 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3758 
3759 		qedr_inc_sw_prod(&qp->sq);
3760 
3761 		qp->sq.db_data.data.value++;
3762 
3763 		wr = wr->next;
3764 	}
3765 
3766 	/* Trigger doorbell
3767 	 * If there was a failure in the first WR then it will be triggered in
3768 	 * vane. However this is not harmful (as long as the producer value is
3769 	 * unchanged). For performance reasons we avoid checking for this
3770 	 * redundant doorbell.
3771 	 *
3772 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3773 	 * soon as we give the doorbell, we could get a completion
3774 	 * for this wr, therefore we need to make sure that the
3775 	 * memory is updated before giving the doorbell.
3776 	 * During qedr_poll_cq, rmb is called before accessing the
3777 	 * cqe. This covers for the smp_rmb as well.
3778 	 */
3779 	smp_wmb();
3780 	writel(qp->sq.db_data.raw, qp->sq.db);
3781 
3782 	spin_unlock_irqrestore(&qp->q_lock, flags);
3783 
3784 	return rc;
3785 }
3786 
3787 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3788 {
3789 	u32 used;
3790 
3791 	/* Calculate number of elements used based on producer
3792 	 * count and consumer count and subtract it from max
3793 	 * work request supported so that we get elements left.
3794 	 */
3795 	used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
3796 
3797 	return hw_srq->max_wr - used;
3798 }
3799 
3800 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3801 		       const struct ib_recv_wr **bad_wr)
3802 {
3803 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3804 	struct qedr_srq_hwq_info *hw_srq;
3805 	struct qedr_dev *dev = srq->dev;
3806 	struct qed_chain *pbl;
3807 	unsigned long flags;
3808 	int status = 0;
3809 	u32 num_sge;
3810 
3811 	spin_lock_irqsave(&srq->lock, flags);
3812 
3813 	hw_srq = &srq->hw_srq;
3814 	pbl = &srq->hw_srq.pbl;
3815 	while (wr) {
3816 		struct rdma_srq_wqe_header *hdr;
3817 		int i;
3818 
3819 		if (!qedr_srq_elem_left(hw_srq) ||
3820 		    wr->num_sge > srq->hw_srq.max_sges) {
3821 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3822 			       hw_srq->wr_prod_cnt,
3823 			       atomic_read(&hw_srq->wr_cons_cnt),
3824 			       wr->num_sge, srq->hw_srq.max_sges);
3825 			status = -ENOMEM;
3826 			*bad_wr = wr;
3827 			break;
3828 		}
3829 
3830 		hdr = qed_chain_produce(pbl);
3831 		num_sge = wr->num_sge;
3832 		/* Set number of sge and work request id in header */
3833 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3834 
3835 		srq->hw_srq.wr_prod_cnt++;
3836 		hw_srq->wqe_prod++;
3837 		hw_srq->sge_prod++;
3838 
3839 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3840 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3841 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3842 
3843 		for (i = 0; i < wr->num_sge; i++) {
3844 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3845 
3846 			/* Set SGE length, lkey and address */
3847 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3848 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3849 
3850 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3851 				 "[%d]: len %d key %x addr %x:%x\n",
3852 				 i, srq_sge->length, srq_sge->l_key,
3853 				 srq_sge->addr.hi, srq_sge->addr.lo);
3854 			hw_srq->sge_prod++;
3855 		}
3856 
3857 		/* Update WQE and SGE information before
3858 		 * updating producer.
3859 		 */
3860 		dma_wmb();
3861 
3862 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3863 		 * in first 4 bytes and need to update WQE producer in
3864 		 * next 4 bytes.
3865 		 */
3866 		srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod);
3867 		/* Make sure sge producer is updated first */
3868 		dma_wmb();
3869 		srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod);
3870 
3871 		wr = wr->next;
3872 	}
3873 
3874 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3875 		 qed_chain_get_elem_left(pbl));
3876 	spin_unlock_irqrestore(&srq->lock, flags);
3877 
3878 	return status;
3879 }
3880 
3881 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3882 		   const struct ib_recv_wr **bad_wr)
3883 {
3884 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3885 	struct qedr_dev *dev = qp->dev;
3886 	unsigned long flags;
3887 	int status = 0;
3888 
3889 	if (qp->qp_type == IB_QPT_GSI)
3890 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3891 
3892 	spin_lock_irqsave(&qp->q_lock, flags);
3893 
3894 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3895 		spin_unlock_irqrestore(&qp->q_lock, flags);
3896 		*bad_wr = wr;
3897 		return -EINVAL;
3898 	}
3899 
3900 	while (wr) {
3901 		int i;
3902 
3903 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3904 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3905 		    wr->num_sge > qp->rq.max_sges) {
3906 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3907 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3908 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3909 			       qp->rq.max_sges);
3910 			status = -ENOMEM;
3911 			*bad_wr = wr;
3912 			break;
3913 		}
3914 		for (i = 0; i < wr->num_sge; i++) {
3915 			u32 flags = 0;
3916 			struct rdma_rq_sge *rqe =
3917 			    qed_chain_produce(&qp->rq.pbl);
3918 
3919 			/* First one must include the number
3920 			 * of SGE in the list
3921 			 */
3922 			if (!i)
3923 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3924 					  wr->num_sge);
3925 
3926 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3927 				  wr->sg_list[i].lkey);
3928 
3929 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3930 				   wr->sg_list[i].length, flags);
3931 		}
3932 
3933 		/* Special case of no sges. FW requires between 1-4 sges...
3934 		 * in this case we need to post 1 sge with length zero. this is
3935 		 * because rdma write with immediate consumes an RQ.
3936 		 */
3937 		if (!wr->num_sge) {
3938 			u32 flags = 0;
3939 			struct rdma_rq_sge *rqe =
3940 			    qed_chain_produce(&qp->rq.pbl);
3941 
3942 			/* First one must include the number
3943 			 * of SGE in the list
3944 			 */
3945 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3946 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3947 
3948 			RQ_SGE_SET(rqe, 0, 0, flags);
3949 			i = 1;
3950 		}
3951 
3952 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3953 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3954 
3955 		qedr_inc_sw_prod(&qp->rq);
3956 
3957 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3958 		 * soon as we give the doorbell, we could get a completion
3959 		 * for this wr, therefore we need to make sure that the
3960 		 * memory is update before giving the doorbell.
3961 		 * During qedr_poll_cq, rmb is called before accessing the
3962 		 * cqe. This covers for the smp_rmb as well.
3963 		 */
3964 		smp_wmb();
3965 
3966 		qp->rq.db_data.data.value++;
3967 
3968 		writel(qp->rq.db_data.raw, qp->rq.db);
3969 
3970 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3971 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3972 		}
3973 
3974 		wr = wr->next;
3975 	}
3976 
3977 	spin_unlock_irqrestore(&qp->q_lock, flags);
3978 
3979 	return status;
3980 }
3981 
3982 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3983 {
3984 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3985 
3986 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3987 		cq->pbl_toggle;
3988 }
3989 
3990 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3991 {
3992 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3993 	struct qedr_qp *qp;
3994 
3995 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3996 						   resp_cqe->qp_handle.lo,
3997 						   u64);
3998 	return qp;
3999 }
4000 
4001 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
4002 {
4003 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
4004 
4005 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
4006 }
4007 
4008 /* Return latest CQE (needs processing) */
4009 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
4010 {
4011 	return cq->latest_cqe;
4012 }
4013 
4014 /* In fmr we need to increase the number of fmr completed counter for the fmr
4015  * algorithm determining whether we can free a pbl or not.
4016  * we need to perform this whether the work request was signaled or not. for
4017  * this purpose we call this function from the condition that checks if a wr
4018  * should be skipped, to make sure we don't miss it ( possibly this fmr
4019  * operation was not signalted)
4020  */
4021 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
4022 {
4023 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
4024 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
4025 }
4026 
4027 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
4028 		       struct qedr_cq *cq, int num_entries,
4029 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
4030 		       int force)
4031 {
4032 	u16 cnt = 0;
4033 
4034 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
4035 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
4036 			qedr_chk_if_fmr(qp);
4037 			/* skip WC */
4038 			goto next_cqe;
4039 		}
4040 
4041 		/* fill WC */
4042 		wc->status = status;
4043 		wc->vendor_err = 0;
4044 		wc->wc_flags = 0;
4045 		wc->src_qp = qp->id;
4046 		wc->qp = &qp->ibqp;
4047 
4048 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
4049 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
4050 
4051 		switch (wc->opcode) {
4052 		case IB_WC_RDMA_WRITE:
4053 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
4054 			break;
4055 		case IB_WC_COMP_SWAP:
4056 		case IB_WC_FETCH_ADD:
4057 			wc->byte_len = 8;
4058 			break;
4059 		case IB_WC_REG_MR:
4060 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
4061 			break;
4062 		case IB_WC_RDMA_READ:
4063 		case IB_WC_SEND:
4064 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
4065 			break;
4066 		default:
4067 			break;
4068 		}
4069 
4070 		num_entries--;
4071 		wc++;
4072 		cnt++;
4073 next_cqe:
4074 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
4075 			qed_chain_consume(&qp->sq.pbl);
4076 		qedr_inc_sw_cons(&qp->sq);
4077 	}
4078 
4079 	return cnt;
4080 }
4081 
4082 static int qedr_poll_cq_req(struct qedr_dev *dev,
4083 			    struct qedr_qp *qp, struct qedr_cq *cq,
4084 			    int num_entries, struct ib_wc *wc,
4085 			    struct rdma_cqe_requester *req)
4086 {
4087 	int cnt = 0;
4088 
4089 	switch (req->status) {
4090 	case RDMA_CQE_REQ_STS_OK:
4091 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4092 				  IB_WC_SUCCESS, 0);
4093 		break;
4094 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
4095 		if (qp->state != QED_ROCE_QP_STATE_ERR)
4096 			DP_DEBUG(dev, QEDR_MSG_CQ,
4097 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4098 				 cq->icid, qp->icid);
4099 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4100 				  IB_WC_WR_FLUSH_ERR, 1);
4101 		break;
4102 	default:
4103 		/* process all WQE before the cosumer */
4104 		qp->state = QED_ROCE_QP_STATE_ERR;
4105 		cnt = process_req(dev, qp, cq, num_entries, wc,
4106 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
4107 		wc += cnt;
4108 		/* if we have extra WC fill it with actual error info */
4109 		if (cnt < num_entries) {
4110 			enum ib_wc_status wc_status;
4111 
4112 			switch (req->status) {
4113 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4114 				DP_ERR(dev,
4115 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4116 				       cq->icid, qp->icid);
4117 				wc_status = IB_WC_BAD_RESP_ERR;
4118 				break;
4119 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4120 				DP_ERR(dev,
4121 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4122 				       cq->icid, qp->icid);
4123 				wc_status = IB_WC_LOC_LEN_ERR;
4124 				break;
4125 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4126 				DP_ERR(dev,
4127 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4128 				       cq->icid, qp->icid);
4129 				wc_status = IB_WC_LOC_QP_OP_ERR;
4130 				break;
4131 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4132 				DP_ERR(dev,
4133 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4134 				       cq->icid, qp->icid);
4135 				wc_status = IB_WC_LOC_PROT_ERR;
4136 				break;
4137 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4138 				DP_ERR(dev,
4139 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4140 				       cq->icid, qp->icid);
4141 				wc_status = IB_WC_MW_BIND_ERR;
4142 				break;
4143 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4144 				DP_ERR(dev,
4145 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4146 				       cq->icid, qp->icid);
4147 				wc_status = IB_WC_REM_INV_REQ_ERR;
4148 				break;
4149 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4150 				DP_ERR(dev,
4151 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4152 				       cq->icid, qp->icid);
4153 				wc_status = IB_WC_REM_ACCESS_ERR;
4154 				break;
4155 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4156 				DP_ERR(dev,
4157 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4158 				       cq->icid, qp->icid);
4159 				wc_status = IB_WC_REM_OP_ERR;
4160 				break;
4161 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4162 				DP_ERR(dev,
4163 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4164 				       cq->icid, qp->icid);
4165 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4166 				break;
4167 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4168 				DP_ERR(dev,
4169 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4170 				       cq->icid, qp->icid);
4171 				wc_status = IB_WC_RETRY_EXC_ERR;
4172 				break;
4173 			default:
4174 				DP_ERR(dev,
4175 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4176 				       cq->icid, qp->icid);
4177 				wc_status = IB_WC_GENERAL_ERR;
4178 			}
4179 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4180 					   wc_status, 1);
4181 		}
4182 	}
4183 
4184 	return cnt;
4185 }
4186 
4187 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4188 {
4189 	switch (status) {
4190 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4191 		return IB_WC_LOC_ACCESS_ERR;
4192 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4193 		return IB_WC_LOC_LEN_ERR;
4194 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4195 		return IB_WC_LOC_QP_OP_ERR;
4196 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4197 		return IB_WC_LOC_PROT_ERR;
4198 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4199 		return IB_WC_MW_BIND_ERR;
4200 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4201 		return IB_WC_REM_INV_RD_REQ_ERR;
4202 	case RDMA_CQE_RESP_STS_OK:
4203 		return IB_WC_SUCCESS;
4204 	default:
4205 		return IB_WC_GENERAL_ERR;
4206 	}
4207 }
4208 
4209 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4210 					  struct ib_wc *wc)
4211 {
4212 	wc->status = IB_WC_SUCCESS;
4213 	wc->byte_len = le32_to_cpu(resp->length);
4214 
4215 	if (resp->flags & QEDR_RESP_IMM) {
4216 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4217 		wc->wc_flags |= IB_WC_WITH_IMM;
4218 
4219 		if (resp->flags & QEDR_RESP_RDMA)
4220 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4221 
4222 		if (resp->flags & QEDR_RESP_INV)
4223 			return -EINVAL;
4224 
4225 	} else if (resp->flags & QEDR_RESP_INV) {
4226 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4227 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4228 
4229 		if (resp->flags & QEDR_RESP_RDMA)
4230 			return -EINVAL;
4231 
4232 	} else if (resp->flags & QEDR_RESP_RDMA) {
4233 		return -EINVAL;
4234 	}
4235 
4236 	return 0;
4237 }
4238 
4239 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4240 			       struct qedr_cq *cq, struct ib_wc *wc,
4241 			       struct rdma_cqe_responder *resp, u64 wr_id)
4242 {
4243 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4244 	wc->opcode = IB_WC_RECV;
4245 	wc->wc_flags = 0;
4246 
4247 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4248 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4249 			DP_ERR(dev,
4250 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4251 			       cq, cq->icid, resp->flags);
4252 
4253 	} else {
4254 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4255 		if (wc->status == IB_WC_GENERAL_ERR)
4256 			DP_ERR(dev,
4257 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4258 			       cq, cq->icid, resp->status);
4259 	}
4260 
4261 	/* Fill the rest of the WC */
4262 	wc->vendor_err = 0;
4263 	wc->src_qp = qp->id;
4264 	wc->qp = &qp->ibqp;
4265 	wc->wr_id = wr_id;
4266 }
4267 
4268 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4269 				struct qedr_cq *cq, struct ib_wc *wc,
4270 				struct rdma_cqe_responder *resp)
4271 {
4272 	struct qedr_srq *srq = qp->srq;
4273 	u64 wr_id;
4274 
4275 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4276 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4277 
4278 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4279 		wc->status = IB_WC_WR_FLUSH_ERR;
4280 		wc->vendor_err = 0;
4281 		wc->wr_id = wr_id;
4282 		wc->byte_len = 0;
4283 		wc->src_qp = qp->id;
4284 		wc->qp = &qp->ibqp;
4285 		wc->wr_id = wr_id;
4286 	} else {
4287 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4288 	}
4289 	atomic_inc(&srq->hw_srq.wr_cons_cnt);
4290 
4291 	return 1;
4292 }
4293 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4294 			    struct qedr_cq *cq, struct ib_wc *wc,
4295 			    struct rdma_cqe_responder *resp)
4296 {
4297 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4298 
4299 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4300 
4301 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4302 		qed_chain_consume(&qp->rq.pbl);
4303 	qedr_inc_sw_cons(&qp->rq);
4304 
4305 	return 1;
4306 }
4307 
4308 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4309 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4310 {
4311 	u16 cnt = 0;
4312 
4313 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4314 		/* fill WC */
4315 		wc->status = IB_WC_WR_FLUSH_ERR;
4316 		wc->vendor_err = 0;
4317 		wc->wc_flags = 0;
4318 		wc->src_qp = qp->id;
4319 		wc->byte_len = 0;
4320 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4321 		wc->qp = &qp->ibqp;
4322 		num_entries--;
4323 		wc++;
4324 		cnt++;
4325 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4326 			qed_chain_consume(&qp->rq.pbl);
4327 		qedr_inc_sw_cons(&qp->rq);
4328 	}
4329 
4330 	return cnt;
4331 }
4332 
4333 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4334 				 struct rdma_cqe_responder *resp, int *update)
4335 {
4336 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4337 		consume_cqe(cq);
4338 		*update |= 1;
4339 	}
4340 }
4341 
4342 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4343 				 struct qedr_cq *cq, int num_entries,
4344 				 struct ib_wc *wc,
4345 				 struct rdma_cqe_responder *resp)
4346 {
4347 	int cnt;
4348 
4349 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4350 	consume_cqe(cq);
4351 
4352 	return cnt;
4353 }
4354 
4355 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4356 			     struct qedr_cq *cq, int num_entries,
4357 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4358 			     int *update)
4359 {
4360 	int cnt;
4361 
4362 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4363 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4364 					 resp->rq_cons_or_srq_id);
4365 		try_consume_resp_cqe(cq, qp, resp, update);
4366 	} else {
4367 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4368 		consume_cqe(cq);
4369 		*update |= 1;
4370 	}
4371 
4372 	return cnt;
4373 }
4374 
4375 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4376 				struct rdma_cqe_requester *req, int *update)
4377 {
4378 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4379 		consume_cqe(cq);
4380 		*update |= 1;
4381 	}
4382 }
4383 
4384 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4385 {
4386 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4387 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4388 	union rdma_cqe *cqe;
4389 	u32 old_cons, new_cons;
4390 	unsigned long flags;
4391 	int update = 0;
4392 	int done = 0;
4393 
4394 	if (cq->destroyed) {
4395 		DP_ERR(dev,
4396 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4397 		       cq, cq->icid);
4398 		return 0;
4399 	}
4400 
4401 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4402 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4403 
4404 	spin_lock_irqsave(&cq->cq_lock, flags);
4405 	cqe = cq->latest_cqe;
4406 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4407 	while (num_entries && is_valid_cqe(cq, cqe)) {
4408 		struct qedr_qp *qp;
4409 		int cnt = 0;
4410 
4411 		/* prevent speculative reads of any field of CQE */
4412 		rmb();
4413 
4414 		qp = cqe_get_qp(cqe);
4415 		if (!qp) {
4416 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4417 			break;
4418 		}
4419 
4420 		wc->qp = &qp->ibqp;
4421 
4422 		switch (cqe_get_type(cqe)) {
4423 		case RDMA_CQE_TYPE_REQUESTER:
4424 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4425 					       &cqe->req);
4426 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4427 			break;
4428 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4429 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4430 						&cqe->resp, &update);
4431 			break;
4432 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4433 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4434 						    wc, &cqe->resp);
4435 			update = 1;
4436 			break;
4437 		case RDMA_CQE_TYPE_INVALID:
4438 		default:
4439 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4440 			       cqe_get_type(cqe));
4441 		}
4442 		num_entries -= cnt;
4443 		wc += cnt;
4444 		done += cnt;
4445 
4446 		cqe = get_cqe(cq);
4447 	}
4448 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4449 
4450 	cq->cq_cons += new_cons - old_cons;
4451 
4452 	if (update)
4453 		/* doorbell notifies abount latest VALID entry,
4454 		 * but chain already point to the next INVALID one
4455 		 */
4456 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4457 
4458 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4459 	return done;
4460 }
4461 
4462 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4463 		     u8 port_num, const struct ib_wc *in_wc,
4464 		     const struct ib_grh *in_grh, const struct ib_mad *in,
4465 		     struct ib_mad *out_mad, size_t *out_mad_size,
4466 		     u16 *out_mad_pkey_index)
4467 {
4468 	return IB_MAD_RESULT_SUCCESS;
4469 }
4470