xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 15506586)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
55 
56 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
57 #define	RDMA_MAX_SGE_PER_SRQ	(4)
58 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
59 
60 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
61 
62 enum {
63 	QEDR_USER_MMAP_IO_WC = 0,
64 	QEDR_USER_MMAP_PHYS_PAGE,
65 };
66 
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
68 					size_t len)
69 {
70 	size_t min_len = min_t(size_t, len, udata->outlen);
71 
72 	return ib_copy_to_udata(udata, src, min_len);
73 }
74 
75 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
76 {
77 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
78 		return -EINVAL;
79 
80 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
81 	return 0;
82 }
83 
84 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
85 		      int index, union ib_gid *sgid)
86 {
87 	struct qedr_dev *dev = get_qedr_dev(ibdev);
88 
89 	memset(sgid->raw, 0, sizeof(sgid->raw));
90 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
91 
92 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 		 sgid->global.interface_id, sgid->global.subnet_prefix);
94 
95 	return 0;
96 }
97 
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
99 {
100 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 	struct qedr_device_attr *qattr = &dev->attr;
102 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
103 
104 	srq_attr->srq_limit = srq->srq_limit;
105 	srq_attr->max_wr = qattr->max_srq_wr;
106 	srq_attr->max_sge = qattr->max_sge;
107 
108 	return 0;
109 }
110 
111 int qedr_query_device(struct ib_device *ibdev,
112 		      struct ib_device_attr *attr, struct ib_udata *udata)
113 {
114 	struct qedr_dev *dev = get_qedr_dev(ibdev);
115 	struct qedr_device_attr *qattr = &dev->attr;
116 
117 	if (!dev->rdma_ctx) {
118 		DP_ERR(dev,
119 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
120 		       dev->rdma_ctx);
121 		return -EINVAL;
122 	}
123 
124 	memset(attr, 0, sizeof(*attr));
125 
126 	attr->fw_ver = qattr->fw_ver;
127 	attr->sys_image_guid = qattr->sys_image_guid;
128 	attr->max_mr_size = qattr->max_mr_size;
129 	attr->page_size_cap = qattr->page_size_caps;
130 	attr->vendor_id = qattr->vendor_id;
131 	attr->vendor_part_id = qattr->vendor_part_id;
132 	attr->hw_ver = qattr->hw_ver;
133 	attr->max_qp = qattr->max_qp;
134 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 	    IB_DEVICE_RC_RNR_NAK_GEN |
137 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
138 
139 	attr->max_send_sge = qattr->max_sge;
140 	attr->max_recv_sge = qattr->max_sge;
141 	attr->max_sge_rd = qattr->max_sge;
142 	attr->max_cq = qattr->max_cq;
143 	attr->max_cqe = qattr->max_cqe;
144 	attr->max_mr = qattr->max_mr;
145 	attr->max_mw = qattr->max_mw;
146 	attr->max_pd = qattr->max_pd;
147 	attr->atomic_cap = dev->atomic_cap;
148 	attr->max_qp_init_rd_atom =
149 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
150 	attr->max_qp_rd_atom =
151 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
152 		attr->max_qp_init_rd_atom);
153 
154 	attr->max_srq = qattr->max_srq;
155 	attr->max_srq_sge = qattr->max_srq_sge;
156 	attr->max_srq_wr = qattr->max_srq_wr;
157 
158 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
159 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
160 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
161 	attr->max_ah = qattr->max_ah;
162 
163 	return 0;
164 }
165 
166 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
167 					    u8 *ib_width)
168 {
169 	switch (speed) {
170 	case 1000:
171 		*ib_speed = IB_SPEED_SDR;
172 		*ib_width = IB_WIDTH_1X;
173 		break;
174 	case 10000:
175 		*ib_speed = IB_SPEED_QDR;
176 		*ib_width = IB_WIDTH_1X;
177 		break;
178 
179 	case 20000:
180 		*ib_speed = IB_SPEED_DDR;
181 		*ib_width = IB_WIDTH_4X;
182 		break;
183 
184 	case 25000:
185 		*ib_speed = IB_SPEED_EDR;
186 		*ib_width = IB_WIDTH_1X;
187 		break;
188 
189 	case 40000:
190 		*ib_speed = IB_SPEED_QDR;
191 		*ib_width = IB_WIDTH_4X;
192 		break;
193 
194 	case 50000:
195 		*ib_speed = IB_SPEED_HDR;
196 		*ib_width = IB_WIDTH_1X;
197 		break;
198 
199 	case 100000:
200 		*ib_speed = IB_SPEED_EDR;
201 		*ib_width = IB_WIDTH_4X;
202 		break;
203 
204 	default:
205 		/* Unsupported */
206 		*ib_speed = IB_SPEED_SDR;
207 		*ib_width = IB_WIDTH_1X;
208 	}
209 }
210 
211 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
212 {
213 	struct qedr_dev *dev;
214 	struct qed_rdma_port *rdma_port;
215 
216 	dev = get_qedr_dev(ibdev);
217 
218 	if (!dev->rdma_ctx) {
219 		DP_ERR(dev, "rdma_ctx is NULL\n");
220 		return -EINVAL;
221 	}
222 
223 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
224 
225 	/* *attr being zeroed by the caller, avoid zeroing it here */
226 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
227 		attr->state = IB_PORT_ACTIVE;
228 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
229 	} else {
230 		attr->state = IB_PORT_DOWN;
231 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
232 	}
233 	attr->max_mtu = IB_MTU_4096;
234 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
235 	attr->lid = 0;
236 	attr->lmc = 0;
237 	attr->sm_lid = 0;
238 	attr->sm_sl = 0;
239 	attr->ip_gids = true;
240 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
241 		attr->gid_tbl_len = 1;
242 		attr->pkey_tbl_len = 1;
243 	} else {
244 		attr->gid_tbl_len = QEDR_MAX_SGID;
245 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
246 	}
247 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
248 	attr->qkey_viol_cntr = 0;
249 	get_link_speed_and_width(rdma_port->link_speed,
250 				 &attr->active_speed, &attr->active_width);
251 	attr->max_msg_sz = rdma_port->max_msg_size;
252 	attr->max_vl_num = 4;
253 
254 	return 0;
255 }
256 
257 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
258 {
259 	struct ib_device *ibdev = uctx->device;
260 	int rc;
261 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
262 	struct qedr_alloc_ucontext_resp uresp = {};
263 	struct qedr_alloc_ucontext_req ureq = {};
264 	struct qedr_dev *dev = get_qedr_dev(ibdev);
265 	struct qed_rdma_add_user_out_params oparams;
266 	struct qedr_user_mmap_entry *entry;
267 
268 	if (!udata)
269 		return -EFAULT;
270 
271 	if (udata->inlen) {
272 		rc = ib_copy_from_udata(&ureq, udata,
273 					min(sizeof(ureq), udata->inlen));
274 		if (rc) {
275 			DP_ERR(dev, "Problem copying data from user space\n");
276 			return -EFAULT;
277 		}
278 
279 		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
280 	}
281 
282 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
283 	if (rc) {
284 		DP_ERR(dev,
285 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
286 		       rc);
287 		return rc;
288 	}
289 
290 	ctx->dpi = oparams.dpi;
291 	ctx->dpi_addr = oparams.dpi_addr;
292 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
293 	ctx->dpi_size = oparams.dpi_size;
294 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
295 	if (!entry) {
296 		rc = -ENOMEM;
297 		goto err;
298 	}
299 
300 	entry->io_address = ctx->dpi_phys_addr;
301 	entry->length = ctx->dpi_size;
302 	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
303 	entry->dpi = ctx->dpi;
304 	entry->dev = dev;
305 	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
306 					 ctx->dpi_size);
307 	if (rc) {
308 		kfree(entry);
309 		goto err;
310 	}
311 	ctx->db_mmap_entry = &entry->rdma_entry;
312 
313 	if (!dev->user_dpm_enabled)
314 		uresp.dpm_flags = 0;
315 	else if (rdma_protocol_iwarp(&dev->ibdev, 1))
316 		uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
317 	else
318 		uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
319 				  QEDR_DPM_TYPE_ROCE_LEGACY;
320 
321 	uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
322 	uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
323 	uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
324 
325 	uresp.wids_enabled = 1;
326 	uresp.wid_count = oparams.wid_count;
327 	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
328 	uresp.db_size = ctx->dpi_size;
329 	uresp.max_send_wr = dev->attr.max_sqe;
330 	uresp.max_recv_wr = dev->attr.max_rqe;
331 	uresp.max_srq_wr = dev->attr.max_srq_wr;
332 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
333 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
334 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
335 	uresp.max_cqes = QEDR_MAX_CQES;
336 
337 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
338 	if (rc)
339 		goto err;
340 
341 	ctx->dev = dev;
342 
343 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
344 		 &ctx->ibucontext);
345 	return 0;
346 
347 err:
348 	if (!ctx->db_mmap_entry)
349 		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
350 	else
351 		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
352 
353 	return rc;
354 }
355 
356 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
357 {
358 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
359 
360 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
361 		 uctx);
362 
363 	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
364 }
365 
366 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
367 {
368 	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
369 	struct qedr_dev *dev = entry->dev;
370 
371 	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
372 		free_page((unsigned long)entry->address);
373 	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
374 		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
375 
376 	kfree(entry);
377 }
378 
379 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
380 {
381 	struct ib_device *dev = ucontext->device;
382 	size_t length = vma->vm_end - vma->vm_start;
383 	struct rdma_user_mmap_entry *rdma_entry;
384 	struct qedr_user_mmap_entry *entry;
385 	int rc = 0;
386 	u64 pfn;
387 
388 	ibdev_dbg(dev,
389 		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
390 		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
391 
392 	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
393 	if (!rdma_entry) {
394 		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
395 			  vma->vm_pgoff);
396 		return -EINVAL;
397 	}
398 	entry = get_qedr_mmap_entry(rdma_entry);
399 	ibdev_dbg(dev,
400 		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
401 		  entry->io_address, length, entry->mmap_flag);
402 
403 	switch (entry->mmap_flag) {
404 	case QEDR_USER_MMAP_IO_WC:
405 		pfn = entry->io_address >> PAGE_SHIFT;
406 		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
407 				       pgprot_writecombine(vma->vm_page_prot),
408 				       rdma_entry);
409 		break;
410 	case QEDR_USER_MMAP_PHYS_PAGE:
411 		rc = vm_insert_page(vma, vma->vm_start,
412 				    virt_to_page(entry->address));
413 		break;
414 	default:
415 		rc = -EINVAL;
416 	}
417 
418 	if (rc)
419 		ibdev_dbg(dev,
420 			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
421 			  entry->io_address, length, entry->mmap_flag, rc);
422 
423 	rdma_user_mmap_entry_put(rdma_entry);
424 	return rc;
425 }
426 
427 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
428 {
429 	struct ib_device *ibdev = ibpd->device;
430 	struct qedr_dev *dev = get_qedr_dev(ibdev);
431 	struct qedr_pd *pd = get_qedr_pd(ibpd);
432 	u16 pd_id;
433 	int rc;
434 
435 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
436 		 udata ? "User Lib" : "Kernel");
437 
438 	if (!dev->rdma_ctx) {
439 		DP_ERR(dev, "invalid RDMA context\n");
440 		return -EINVAL;
441 	}
442 
443 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
444 	if (rc)
445 		return rc;
446 
447 	pd->pd_id = pd_id;
448 
449 	if (udata) {
450 		struct qedr_alloc_pd_uresp uresp = {
451 			.pd_id = pd_id,
452 		};
453 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
454 			udata, struct qedr_ucontext, ibucontext);
455 
456 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
457 		if (rc) {
458 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
459 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
460 			return rc;
461 		}
462 
463 		pd->uctx = context;
464 		pd->uctx->pd = pd;
465 	}
466 
467 	return 0;
468 }
469 
470 void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
471 {
472 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
473 	struct qedr_pd *pd = get_qedr_pd(ibpd);
474 
475 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
476 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
477 }
478 
479 static void qedr_free_pbl(struct qedr_dev *dev,
480 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
481 {
482 	struct pci_dev *pdev = dev->pdev;
483 	int i;
484 
485 	for (i = 0; i < pbl_info->num_pbls; i++) {
486 		if (!pbl[i].va)
487 			continue;
488 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
489 				  pbl[i].va, pbl[i].pa);
490 	}
491 
492 	kfree(pbl);
493 }
494 
495 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
496 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
497 
498 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
499 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
500 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
501 
502 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
503 					   struct qedr_pbl_info *pbl_info,
504 					   gfp_t flags)
505 {
506 	struct pci_dev *pdev = dev->pdev;
507 	struct qedr_pbl *pbl_table;
508 	dma_addr_t *pbl_main_tbl;
509 	dma_addr_t pa;
510 	void *va;
511 	int i;
512 
513 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
514 	if (!pbl_table)
515 		return ERR_PTR(-ENOMEM);
516 
517 	for (i = 0; i < pbl_info->num_pbls; i++) {
518 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
519 					flags);
520 		if (!va)
521 			goto err;
522 
523 		pbl_table[i].va = va;
524 		pbl_table[i].pa = pa;
525 	}
526 
527 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
528 	 * the first one with physical pointers to all of the rest
529 	 */
530 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
531 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
532 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
533 
534 	return pbl_table;
535 
536 err:
537 	for (i--; i >= 0; i--)
538 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
539 				  pbl_table[i].va, pbl_table[i].pa);
540 
541 	qedr_free_pbl(dev, pbl_info, pbl_table);
542 
543 	return ERR_PTR(-ENOMEM);
544 }
545 
546 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
547 				struct qedr_pbl_info *pbl_info,
548 				u32 num_pbes, int two_layer_capable)
549 {
550 	u32 pbl_capacity;
551 	u32 pbl_size;
552 	u32 num_pbls;
553 
554 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
555 		if (num_pbes > MAX_PBES_TWO_LAYER) {
556 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
557 			       num_pbes);
558 			return -EINVAL;
559 		}
560 
561 		/* calculate required pbl page size */
562 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
563 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
564 			       NUM_PBES_ON_PAGE(pbl_size);
565 
566 		while (pbl_capacity < num_pbes) {
567 			pbl_size *= 2;
568 			pbl_capacity = pbl_size / sizeof(u64);
569 			pbl_capacity = pbl_capacity * pbl_capacity;
570 		}
571 
572 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
573 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
574 		pbl_info->two_layered = true;
575 	} else {
576 		/* One layered PBL */
577 		num_pbls = 1;
578 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
579 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
580 		pbl_info->two_layered = false;
581 	}
582 
583 	pbl_info->num_pbls = num_pbls;
584 	pbl_info->pbl_size = pbl_size;
585 	pbl_info->num_pbes = num_pbes;
586 
587 	DP_DEBUG(dev, QEDR_MSG_MR,
588 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
589 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
590 
591 	return 0;
592 }
593 
594 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
595 			       struct qedr_pbl *pbl,
596 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
597 {
598 	int pbe_cnt, total_num_pbes = 0;
599 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
600 	struct qedr_pbl *pbl_tbl;
601 	struct sg_dma_page_iter sg_iter;
602 	struct regpair *pbe;
603 	u64 pg_addr;
604 
605 	if (!pbl_info->num_pbes)
606 		return;
607 
608 	/* If we have a two layered pbl, the first pbl points to the rest
609 	 * of the pbls and the first entry lays on the second pbl in the table
610 	 */
611 	if (pbl_info->two_layered)
612 		pbl_tbl = &pbl[1];
613 	else
614 		pbl_tbl = pbl;
615 
616 	pbe = (struct regpair *)pbl_tbl->va;
617 	if (!pbe) {
618 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
619 		return;
620 	}
621 
622 	pbe_cnt = 0;
623 
624 	fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
625 
626 	for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
627 		pg_addr = sg_page_iter_dma_address(&sg_iter);
628 		for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
629 			pbe->lo = cpu_to_le32(pg_addr);
630 			pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
631 
632 			pg_addr += BIT(pg_shift);
633 			pbe_cnt++;
634 			total_num_pbes++;
635 			pbe++;
636 
637 			if (total_num_pbes == pbl_info->num_pbes)
638 				return;
639 
640 			/* If the given pbl is full storing the pbes,
641 			 * move to next pbl.
642 			 */
643 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
644 				pbl_tbl++;
645 				pbe = (struct regpair *)pbl_tbl->va;
646 				pbe_cnt = 0;
647 			}
648 
649 			fw_pg_cnt++;
650 		}
651 	}
652 }
653 
654 static int qedr_db_recovery_add(struct qedr_dev *dev,
655 				void __iomem *db_addr,
656 				void *db_data,
657 				enum qed_db_rec_width db_width,
658 				enum qed_db_rec_space db_space)
659 {
660 	if (!db_data) {
661 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
662 		return 0;
663 	}
664 
665 	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
666 						 db_width, db_space);
667 }
668 
669 static void qedr_db_recovery_del(struct qedr_dev *dev,
670 				 void __iomem *db_addr,
671 				 void *db_data)
672 {
673 	if (!db_data) {
674 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
675 		return;
676 	}
677 
678 	/* Ignore return code as there is not much we can do about it. Error
679 	 * log will be printed inside.
680 	 */
681 	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
682 }
683 
684 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
685 			      struct qedr_cq *cq, struct ib_udata *udata,
686 			      u32 db_offset)
687 {
688 	struct qedr_create_cq_uresp uresp;
689 	int rc;
690 
691 	memset(&uresp, 0, sizeof(uresp));
692 
693 	uresp.db_offset = db_offset;
694 	uresp.icid = cq->icid;
695 	if (cq->q.db_mmap_entry)
696 		uresp.db_rec_addr =
697 			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
698 
699 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
700 	if (rc)
701 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
702 
703 	return rc;
704 }
705 
706 static void consume_cqe(struct qedr_cq *cq)
707 {
708 	if (cq->latest_cqe == cq->toggle_cqe)
709 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
710 
711 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
712 }
713 
714 static inline int qedr_align_cq_entries(int entries)
715 {
716 	u64 size, aligned_size;
717 
718 	/* We allocate an extra entry that we don't report to the FW. */
719 	size = (entries + 1) * QEDR_CQE_SIZE;
720 	aligned_size = ALIGN(size, PAGE_SIZE);
721 
722 	return aligned_size / QEDR_CQE_SIZE;
723 }
724 
725 static int qedr_init_user_db_rec(struct ib_udata *udata,
726 				 struct qedr_dev *dev, struct qedr_userq *q,
727 				 bool requires_db_rec)
728 {
729 	struct qedr_ucontext *uctx =
730 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
731 					  ibucontext);
732 	struct qedr_user_mmap_entry *entry;
733 	int rc;
734 
735 	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
736 	if (requires_db_rec == 0 || !uctx->db_rec)
737 		return 0;
738 
739 	/* Allocate a page for doorbell recovery, add to mmap */
740 	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
741 	if (!q->db_rec_data) {
742 		DP_ERR(dev, "get_zeroed_page failed\n");
743 		return -ENOMEM;
744 	}
745 
746 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
747 	if (!entry)
748 		goto err_free_db_data;
749 
750 	entry->address = q->db_rec_data;
751 	entry->length = PAGE_SIZE;
752 	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
753 	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
754 					 &entry->rdma_entry,
755 					 PAGE_SIZE);
756 	if (rc)
757 		goto err_free_entry;
758 
759 	q->db_mmap_entry = &entry->rdma_entry;
760 
761 	return 0;
762 
763 err_free_entry:
764 	kfree(entry);
765 
766 err_free_db_data:
767 	free_page((unsigned long)q->db_rec_data);
768 	q->db_rec_data = NULL;
769 	return -ENOMEM;
770 }
771 
772 static inline int qedr_init_user_queue(struct ib_udata *udata,
773 				       struct qedr_dev *dev,
774 				       struct qedr_userq *q, u64 buf_addr,
775 				       size_t buf_len, bool requires_db_rec,
776 				       int access,
777 				       int alloc_and_init)
778 {
779 	u32 fw_pages;
780 	int rc;
781 
782 	q->buf_addr = buf_addr;
783 	q->buf_len = buf_len;
784 	q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
785 	if (IS_ERR(q->umem)) {
786 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
787 		       PTR_ERR(q->umem));
788 		return PTR_ERR(q->umem);
789 	}
790 
791 	fw_pages = ib_umem_page_count(q->umem) <<
792 	    (PAGE_SHIFT - FW_PAGE_SHIFT);
793 
794 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
795 	if (rc)
796 		goto err0;
797 
798 	if (alloc_and_init) {
799 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
800 		if (IS_ERR(q->pbl_tbl)) {
801 			rc = PTR_ERR(q->pbl_tbl);
802 			goto err0;
803 		}
804 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
805 				   FW_PAGE_SHIFT);
806 	} else {
807 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
808 		if (!q->pbl_tbl) {
809 			rc = -ENOMEM;
810 			goto err0;
811 		}
812 	}
813 
814 	/* mmap the user address used to store doorbell data for recovery */
815 	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
816 
817 err0:
818 	ib_umem_release(q->umem);
819 	q->umem = NULL;
820 
821 	return rc;
822 }
823 
824 static inline void qedr_init_cq_params(struct qedr_cq *cq,
825 				       struct qedr_ucontext *ctx,
826 				       struct qedr_dev *dev, int vector,
827 				       int chain_entries, int page_cnt,
828 				       u64 pbl_ptr,
829 				       struct qed_rdma_create_cq_in_params
830 				       *params)
831 {
832 	memset(params, 0, sizeof(*params));
833 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
834 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
835 	params->cnq_id = vector;
836 	params->cq_size = chain_entries - 1;
837 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
838 	params->pbl_num_pages = page_cnt;
839 	params->pbl_ptr = pbl_ptr;
840 	params->pbl_two_level = 0;
841 }
842 
843 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
844 {
845 	cq->db.data.agg_flags = flags;
846 	cq->db.data.value = cpu_to_le32(cons);
847 	writeq(cq->db.raw, cq->db_addr);
848 }
849 
850 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
851 {
852 	struct qedr_cq *cq = get_qedr_cq(ibcq);
853 	unsigned long sflags;
854 	struct qedr_dev *dev;
855 
856 	dev = get_qedr_dev(ibcq->device);
857 
858 	if (cq->destroyed) {
859 		DP_ERR(dev,
860 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
861 		       cq, cq->icid);
862 		return -EINVAL;
863 	}
864 
865 
866 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
867 		return 0;
868 
869 	spin_lock_irqsave(&cq->cq_lock, sflags);
870 
871 	cq->arm_flags = 0;
872 
873 	if (flags & IB_CQ_SOLICITED)
874 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
875 
876 	if (flags & IB_CQ_NEXT_COMP)
877 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
878 
879 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
880 
881 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
882 
883 	return 0;
884 }
885 
886 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
887 		   struct ib_udata *udata)
888 {
889 	struct ib_device *ibdev = ibcq->device;
890 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
891 		udata, struct qedr_ucontext, ibucontext);
892 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
893 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
894 	struct qed_chain_init_params chain_params = {
895 		.mode		= QED_CHAIN_MODE_PBL,
896 		.intended_use	= QED_CHAIN_USE_TO_CONSUME,
897 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
898 		.elem_size	= sizeof(union rdma_cqe),
899 	};
900 	struct qedr_dev *dev = get_qedr_dev(ibdev);
901 	struct qed_rdma_create_cq_in_params params;
902 	struct qedr_create_cq_ureq ureq = {};
903 	int vector = attr->comp_vector;
904 	int entries = attr->cqe;
905 	struct qedr_cq *cq = get_qedr_cq(ibcq);
906 	int chain_entries;
907 	u32 db_offset;
908 	int page_cnt;
909 	u64 pbl_ptr;
910 	u16 icid;
911 	int rc;
912 
913 	DP_DEBUG(dev, QEDR_MSG_INIT,
914 		 "create_cq: called from %s. entries=%d, vector=%d\n",
915 		 udata ? "User Lib" : "Kernel", entries, vector);
916 
917 	if (entries > QEDR_MAX_CQES) {
918 		DP_ERR(dev,
919 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
920 		       entries, QEDR_MAX_CQES);
921 		return -EINVAL;
922 	}
923 
924 	chain_entries = qedr_align_cq_entries(entries);
925 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
926 	chain_params.num_elems = chain_entries;
927 
928 	/* calc db offset. user will add DPI base, kernel will add db addr */
929 	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
930 
931 	if (udata) {
932 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
933 							 udata->inlen))) {
934 			DP_ERR(dev,
935 			       "create cq: problem copying data from user space\n");
936 			goto err0;
937 		}
938 
939 		if (!ureq.len) {
940 			DP_ERR(dev,
941 			       "create cq: cannot create a cq with 0 entries\n");
942 			goto err0;
943 		}
944 
945 		cq->cq_type = QEDR_CQ_TYPE_USER;
946 
947 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
948 					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
949 					  1);
950 		if (rc)
951 			goto err0;
952 
953 		pbl_ptr = cq->q.pbl_tbl->pa;
954 		page_cnt = cq->q.pbl_info.num_pbes;
955 
956 		cq->ibcq.cqe = chain_entries;
957 		cq->q.db_addr = ctx->dpi_addr + db_offset;
958 	} else {
959 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
960 
961 		rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
962 						   &chain_params);
963 		if (rc)
964 			goto err0;
965 
966 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
967 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
968 		cq->ibcq.cqe = cq->pbl.capacity;
969 	}
970 
971 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
972 			    pbl_ptr, &params);
973 
974 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
975 	if (rc)
976 		goto err1;
977 
978 	cq->icid = icid;
979 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
980 	spin_lock_init(&cq->cq_lock);
981 
982 	if (udata) {
983 		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
984 		if (rc)
985 			goto err2;
986 
987 		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
988 					  &cq->q.db_rec_data->db_data,
989 					  DB_REC_WIDTH_64B,
990 					  DB_REC_USER);
991 		if (rc)
992 			goto err2;
993 
994 	} else {
995 		/* Generate doorbell address. */
996 		cq->db.data.icid = cq->icid;
997 		cq->db_addr = dev->db_addr + db_offset;
998 		cq->db.data.params = DB_AGG_CMD_SET <<
999 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1000 
1001 		/* point to the very last element, passing it we will toggle */
1002 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1003 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1004 		cq->latest_cqe = NULL;
1005 		consume_cqe(cq);
1006 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1007 
1008 		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1009 					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
1010 		if (rc)
1011 			goto err2;
1012 	}
1013 
1014 	DP_DEBUG(dev, QEDR_MSG_CQ,
1015 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1016 		 cq->icid, cq, params.cq_size);
1017 
1018 	return 0;
1019 
1020 err2:
1021 	destroy_iparams.icid = cq->icid;
1022 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1023 				  &destroy_oparams);
1024 err1:
1025 	if (udata) {
1026 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1027 		ib_umem_release(cq->q.umem);
1028 		if (cq->q.db_mmap_entry)
1029 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1030 	} else {
1031 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1032 	}
1033 err0:
1034 	return -EINVAL;
1035 }
1036 
1037 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1038 {
1039 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1040 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1041 
1042 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1043 
1044 	return 0;
1045 }
1046 
1047 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1048 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1049 
1050 void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1051 {
1052 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1053 	struct qed_rdma_destroy_cq_out_params oparams;
1054 	struct qed_rdma_destroy_cq_in_params iparams;
1055 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1056 	int iter;
1057 
1058 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1059 
1060 	cq->destroyed = 1;
1061 
1062 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1063 	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1064 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1065 		return;
1066 	}
1067 
1068 	iparams.icid = cq->icid;
1069 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1070 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1071 
1072 	if (udata) {
1073 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1074 		ib_umem_release(cq->q.umem);
1075 
1076 		if (cq->q.db_rec_data) {
1077 			qedr_db_recovery_del(dev, cq->q.db_addr,
1078 					     &cq->q.db_rec_data->db_data);
1079 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1080 		}
1081 	} else {
1082 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1083 	}
1084 
1085 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1086 	 * wait until all CNQ interrupts, if any, are received. This will always
1087 	 * happen and will always happen very fast. If not, then a serious error
1088 	 * has occured. That is why we can use a long delay.
1089 	 * We spin for a short time so we don’t lose time on context switching
1090 	 * in case all the completions are handled in that span. Otherwise
1091 	 * we sleep for a while and check again. Since the CNQ may be
1092 	 * associated with (only) the current CPU we use msleep to allow the
1093 	 * current CPU to be freed.
1094 	 * The CNQ notification is increased in qedr_irq_handler().
1095 	 */
1096 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1097 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1098 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1099 		iter--;
1100 	}
1101 
1102 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1103 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1104 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1105 		iter--;
1106 	}
1107 
1108 	/* Note that we don't need to have explicit code to wait for the
1109 	 * completion of the event handler because it is invoked from the EQ.
1110 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1111 	 * be certain that there's no event handler in process.
1112 	 */
1113 }
1114 
1115 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1116 					  struct ib_qp_attr *attr,
1117 					  int attr_mask,
1118 					  struct qed_rdma_modify_qp_in_params
1119 					  *qp_params)
1120 {
1121 	const struct ib_gid_attr *gid_attr;
1122 	enum rdma_network_type nw_type;
1123 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1124 	u32 ipv4_addr;
1125 	int ret;
1126 	int i;
1127 
1128 	gid_attr = grh->sgid_attr;
1129 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1130 	if (ret)
1131 		return ret;
1132 
1133 	nw_type = rdma_gid_attr_network_type(gid_attr);
1134 	switch (nw_type) {
1135 	case RDMA_NETWORK_IPV6:
1136 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1137 		       sizeof(qp_params->sgid));
1138 		memcpy(&qp_params->dgid.bytes[0],
1139 		       &grh->dgid,
1140 		       sizeof(qp_params->dgid));
1141 		qp_params->roce_mode = ROCE_V2_IPV6;
1142 		SET_FIELD(qp_params->modify_flags,
1143 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1144 		break;
1145 	case RDMA_NETWORK_IB:
1146 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1147 		       sizeof(qp_params->sgid));
1148 		memcpy(&qp_params->dgid.bytes[0],
1149 		       &grh->dgid,
1150 		       sizeof(qp_params->dgid));
1151 		qp_params->roce_mode = ROCE_V1;
1152 		break;
1153 	case RDMA_NETWORK_IPV4:
1154 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1155 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1156 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1157 		qp_params->sgid.ipv4_addr = ipv4_addr;
1158 		ipv4_addr =
1159 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1160 		qp_params->dgid.ipv4_addr = ipv4_addr;
1161 		SET_FIELD(qp_params->modify_flags,
1162 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1163 		qp_params->roce_mode = ROCE_V2_IPV4;
1164 		break;
1165 	}
1166 
1167 	for (i = 0; i < 4; i++) {
1168 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1169 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1170 	}
1171 
1172 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1173 		qp_params->vlan_id = 0;
1174 
1175 	return 0;
1176 }
1177 
1178 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1179 			       struct ib_qp_init_attr *attrs,
1180 			       struct ib_udata *udata)
1181 {
1182 	struct qedr_device_attr *qattr = &dev->attr;
1183 
1184 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1185 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1186 		DP_DEBUG(dev, QEDR_MSG_QP,
1187 			 "create qp: unsupported qp type=0x%x requested\n",
1188 			 attrs->qp_type);
1189 		return -EOPNOTSUPP;
1190 	}
1191 
1192 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1193 		DP_ERR(dev,
1194 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1195 		       attrs->cap.max_send_wr, qattr->max_sqe);
1196 		return -EINVAL;
1197 	}
1198 
1199 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1200 		DP_ERR(dev,
1201 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1202 		       attrs->cap.max_inline_data, qattr->max_inline);
1203 		return -EINVAL;
1204 	}
1205 
1206 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1207 		DP_ERR(dev,
1208 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1209 		       attrs->cap.max_send_sge, qattr->max_sge);
1210 		return -EINVAL;
1211 	}
1212 
1213 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1214 		DP_ERR(dev,
1215 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1216 		       attrs->cap.max_recv_sge, qattr->max_sge);
1217 		return -EINVAL;
1218 	}
1219 
1220 	/* Unprivileged user space cannot create special QP */
1221 	if (udata && attrs->qp_type == IB_QPT_GSI) {
1222 		DP_ERR(dev,
1223 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1224 		       attrs->qp_type);
1225 		return -EINVAL;
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1232 			       struct qedr_srq *srq, struct ib_udata *udata)
1233 {
1234 	struct qedr_create_srq_uresp uresp = {};
1235 	int rc;
1236 
1237 	uresp.srq_id = srq->srq_id;
1238 
1239 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1240 	if (rc)
1241 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1242 
1243 	return rc;
1244 }
1245 
1246 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1247 			      struct qedr_create_qp_uresp *uresp,
1248 			      struct qedr_qp *qp)
1249 {
1250 	/* iWARP requires two doorbells per RQ. */
1251 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1252 		uresp->rq_db_offset =
1253 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1254 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1255 	} else {
1256 		uresp->rq_db_offset =
1257 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1258 	}
1259 
1260 	uresp->rq_icid = qp->icid;
1261 	if (qp->urq.db_mmap_entry)
1262 		uresp->rq_db_rec_addr =
1263 			rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1264 }
1265 
1266 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1267 			       struct qedr_create_qp_uresp *uresp,
1268 			       struct qedr_qp *qp)
1269 {
1270 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1271 
1272 	/* iWARP uses the same cid for rq and sq */
1273 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1274 		uresp->sq_icid = qp->icid;
1275 	else
1276 		uresp->sq_icid = qp->icid + 1;
1277 
1278 	if (qp->usq.db_mmap_entry)
1279 		uresp->sq_db_rec_addr =
1280 			rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1281 }
1282 
1283 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1284 			      struct qedr_qp *qp, struct ib_udata *udata,
1285 			      struct qedr_create_qp_uresp *uresp)
1286 {
1287 	int rc;
1288 
1289 	memset(uresp, 0, sizeof(*uresp));
1290 	qedr_copy_sq_uresp(dev, uresp, qp);
1291 	qedr_copy_rq_uresp(dev, uresp, qp);
1292 
1293 	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1294 	uresp->qp_id = qp->qp_id;
1295 
1296 	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1297 	if (rc)
1298 		DP_ERR(dev,
1299 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1300 		       qp->icid);
1301 
1302 	return rc;
1303 }
1304 
1305 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1306 				      struct qedr_qp *qp,
1307 				      struct qedr_pd *pd,
1308 				      struct ib_qp_init_attr *attrs)
1309 {
1310 	spin_lock_init(&qp->q_lock);
1311 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1312 		kref_init(&qp->refcnt);
1313 		init_completion(&qp->iwarp_cm_comp);
1314 	}
1315 	qp->pd = pd;
1316 	qp->qp_type = attrs->qp_type;
1317 	qp->max_inline_data = attrs->cap.max_inline_data;
1318 	qp->sq.max_sges = attrs->cap.max_send_sge;
1319 	qp->state = QED_ROCE_QP_STATE_RESET;
1320 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1321 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1322 	qp->dev = dev;
1323 
1324 	if (attrs->srq) {
1325 		qp->srq = get_qedr_srq(attrs->srq);
1326 	} else {
1327 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1328 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1329 		DP_DEBUG(dev, QEDR_MSG_QP,
1330 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1331 			 qp->rq.max_sges, qp->rq_cq->icid);
1332 	}
1333 
1334 	DP_DEBUG(dev, QEDR_MSG_QP,
1335 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1336 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1337 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1338 	DP_DEBUG(dev, QEDR_MSG_QP,
1339 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1340 		 qp->sq.max_sges, qp->sq_cq->icid);
1341 }
1342 
1343 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1344 {
1345 	int rc;
1346 
1347 	qp->sq.db = dev->db_addr +
1348 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1349 	qp->sq.db_data.data.icid = qp->icid + 1;
1350 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1351 				  &qp->sq.db_data,
1352 				  DB_REC_WIDTH_32B,
1353 				  DB_REC_KERNEL);
1354 	if (rc)
1355 		return rc;
1356 
1357 	if (!qp->srq) {
1358 		qp->rq.db = dev->db_addr +
1359 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1360 		qp->rq.db_data.data.icid = qp->icid;
1361 
1362 		rc = qedr_db_recovery_add(dev, qp->rq.db,
1363 					  &qp->rq.db_data,
1364 					  DB_REC_WIDTH_32B,
1365 					  DB_REC_KERNEL);
1366 		if (rc)
1367 			qedr_db_recovery_del(dev, qp->sq.db,
1368 					     &qp->sq.db_data);
1369 	}
1370 
1371 	return rc;
1372 }
1373 
1374 static int qedr_check_srq_params(struct qedr_dev *dev,
1375 				 struct ib_srq_init_attr *attrs,
1376 				 struct ib_udata *udata)
1377 {
1378 	struct qedr_device_attr *qattr = &dev->attr;
1379 
1380 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1381 		DP_ERR(dev,
1382 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1383 		       attrs->attr.max_wr, qattr->max_srq_wr);
1384 		return -EINVAL;
1385 	}
1386 
1387 	if (attrs->attr.max_sge > qattr->max_sge) {
1388 		DP_ERR(dev,
1389 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1390 		       attrs->attr.max_sge, qattr->max_sge);
1391 		return -EINVAL;
1392 	}
1393 
1394 	return 0;
1395 }
1396 
1397 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1398 {
1399 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1400 	ib_umem_release(srq->usrq.umem);
1401 	ib_umem_release(srq->prod_umem);
1402 }
1403 
1404 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1405 {
1406 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1407 	struct qedr_dev *dev = srq->dev;
1408 
1409 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1410 
1411 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1412 			  hw_srq->virt_prod_pair_addr,
1413 			  hw_srq->phy_prod_pair_addr);
1414 }
1415 
1416 static int qedr_init_srq_user_params(struct ib_udata *udata,
1417 				     struct qedr_srq *srq,
1418 				     struct qedr_create_srq_ureq *ureq,
1419 				     int access)
1420 {
1421 	struct scatterlist *sg;
1422 	int rc;
1423 
1424 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1425 				  ureq->srq_len, false, access, 1);
1426 	if (rc)
1427 		return rc;
1428 
1429 	srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1430 				     sizeof(struct rdma_srq_producers), access);
1431 	if (IS_ERR(srq->prod_umem)) {
1432 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1433 		ib_umem_release(srq->usrq.umem);
1434 		DP_ERR(srq->dev,
1435 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1436 		       PTR_ERR(srq->prod_umem));
1437 		return PTR_ERR(srq->prod_umem);
1438 	}
1439 
1440 	sg = srq->prod_umem->sg_head.sgl;
1441 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1442 
1443 	return 0;
1444 }
1445 
1446 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1447 					struct qedr_dev *dev,
1448 					struct ib_srq_init_attr *init_attr)
1449 {
1450 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1451 	struct qed_chain_init_params params = {
1452 		.mode		= QED_CHAIN_MODE_PBL,
1453 		.intended_use	= QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1454 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1455 		.elem_size	= QEDR_SRQ_WQE_ELEM_SIZE,
1456 	};
1457 	dma_addr_t phy_prod_pair_addr;
1458 	u32 num_elems;
1459 	void *va;
1460 	int rc;
1461 
1462 	va = dma_alloc_coherent(&dev->pdev->dev,
1463 				sizeof(struct rdma_srq_producers),
1464 				&phy_prod_pair_addr, GFP_KERNEL);
1465 	if (!va) {
1466 		DP_ERR(dev,
1467 		       "create srq: failed to allocate dma memory for producer\n");
1468 		return -ENOMEM;
1469 	}
1470 
1471 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1472 	hw_srq->virt_prod_pair_addr = va;
1473 
1474 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1475 	params.num_elems = num_elems;
1476 
1477 	rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
1478 	if (rc)
1479 		goto err0;
1480 
1481 	hw_srq->num_elems = num_elems;
1482 
1483 	return 0;
1484 
1485 err0:
1486 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1487 			  va, phy_prod_pair_addr);
1488 	return rc;
1489 }
1490 
1491 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1492 		    struct ib_udata *udata)
1493 {
1494 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1495 	struct qed_rdma_create_srq_in_params in_params = {};
1496 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1497 	struct qed_rdma_create_srq_out_params out_params;
1498 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1499 	struct qedr_create_srq_ureq ureq = {};
1500 	u64 pbl_base_addr, phy_prod_pair_addr;
1501 	struct qedr_srq_hwq_info *hw_srq;
1502 	u32 page_cnt, page_size;
1503 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1504 	int rc = 0;
1505 
1506 	DP_DEBUG(dev, QEDR_MSG_QP,
1507 		 "create SRQ called from %s (pd %p)\n",
1508 		 (udata) ? "User lib" : "kernel", pd);
1509 
1510 	rc = qedr_check_srq_params(dev, init_attr, udata);
1511 	if (rc)
1512 		return -EINVAL;
1513 
1514 	srq->dev = dev;
1515 	hw_srq = &srq->hw_srq;
1516 	spin_lock_init(&srq->lock);
1517 
1518 	hw_srq->max_wr = init_attr->attr.max_wr;
1519 	hw_srq->max_sges = init_attr->attr.max_sge;
1520 
1521 	if (udata) {
1522 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1523 							 udata->inlen))) {
1524 			DP_ERR(dev,
1525 			       "create srq: problem copying data from user space\n");
1526 			goto err0;
1527 		}
1528 
1529 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1530 		if (rc)
1531 			goto err0;
1532 
1533 		page_cnt = srq->usrq.pbl_info.num_pbes;
1534 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1535 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1536 		page_size = PAGE_SIZE;
1537 	} else {
1538 		struct qed_chain *pbl;
1539 
1540 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1541 		if (rc)
1542 			goto err0;
1543 
1544 		pbl = &hw_srq->pbl;
1545 		page_cnt = qed_chain_get_page_cnt(pbl);
1546 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1547 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1548 		page_size = QED_CHAIN_PAGE_SIZE;
1549 	}
1550 
1551 	in_params.pd_id = pd->pd_id;
1552 	in_params.pbl_base_addr = pbl_base_addr;
1553 	in_params.prod_pair_addr = phy_prod_pair_addr;
1554 	in_params.num_pages = page_cnt;
1555 	in_params.page_size = page_size;
1556 
1557 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1558 	if (rc)
1559 		goto err1;
1560 
1561 	srq->srq_id = out_params.srq_id;
1562 
1563 	if (udata) {
1564 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1565 		if (rc)
1566 			goto err2;
1567 	}
1568 
1569 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1570 	if (rc)
1571 		goto err2;
1572 
1573 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1574 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1575 	return 0;
1576 
1577 err2:
1578 	destroy_in_params.srq_id = srq->srq_id;
1579 
1580 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1581 err1:
1582 	if (udata)
1583 		qedr_free_srq_user_params(srq);
1584 	else
1585 		qedr_free_srq_kernel_params(srq);
1586 err0:
1587 	return -EFAULT;
1588 }
1589 
1590 void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1591 {
1592 	struct qed_rdma_destroy_srq_in_params in_params = {};
1593 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1594 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1595 
1596 	xa_erase_irq(&dev->srqs, srq->srq_id);
1597 	in_params.srq_id = srq->srq_id;
1598 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1599 
1600 	if (ibsrq->uobject)
1601 		qedr_free_srq_user_params(srq);
1602 	else
1603 		qedr_free_srq_kernel_params(srq);
1604 
1605 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1606 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1607 		 srq->srq_id);
1608 }
1609 
1610 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1611 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1612 {
1613 	struct qed_rdma_modify_srq_in_params in_params = {};
1614 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1615 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1616 	int rc;
1617 
1618 	if (attr_mask & IB_SRQ_MAX_WR) {
1619 		DP_ERR(dev,
1620 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1621 		       attr_mask, srq);
1622 		return -EINVAL;
1623 	}
1624 
1625 	if (attr_mask & IB_SRQ_LIMIT) {
1626 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1627 			DP_ERR(dev,
1628 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1629 			       attr->srq_limit, srq->hw_srq.max_wr);
1630 			return -EINVAL;
1631 		}
1632 
1633 		in_params.srq_id = srq->srq_id;
1634 		in_params.wqe_limit = attr->srq_limit;
1635 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1636 		if (rc)
1637 			return rc;
1638 	}
1639 
1640 	srq->srq_limit = attr->srq_limit;
1641 
1642 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1643 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1644 
1645 	return 0;
1646 }
1647 
1648 static inline void
1649 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1650 			      struct qedr_pd *pd,
1651 			      struct qedr_qp *qp,
1652 			      struct ib_qp_init_attr *attrs,
1653 			      bool fmr_and_reserved_lkey,
1654 			      struct qed_rdma_create_qp_in_params *params)
1655 {
1656 	/* QP handle to be written in an async event */
1657 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1658 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1659 
1660 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1661 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1662 	params->pd = pd->pd_id;
1663 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1664 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1665 	params->stats_queue = 0;
1666 	params->srq_id = 0;
1667 	params->use_srq = false;
1668 
1669 	if (!qp->srq) {
1670 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1671 
1672 	} else {
1673 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1674 		params->srq_id = qp->srq->srq_id;
1675 		params->use_srq = true;
1676 	}
1677 }
1678 
1679 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1680 {
1681 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1682 		 "qp=%p. "
1683 		 "sq_addr=0x%llx, "
1684 		 "sq_len=%zd, "
1685 		 "rq_addr=0x%llx, "
1686 		 "rq_len=%zd"
1687 		 "\n",
1688 		 qp,
1689 		 qp->usq.buf_addr,
1690 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1691 }
1692 
1693 static inline void
1694 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1695 			    struct qedr_qp *qp,
1696 			    struct qed_rdma_create_qp_out_params *out_params)
1697 {
1698 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1699 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1700 
1701 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1702 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1703 	if (!qp->srq) {
1704 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1705 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1706 	}
1707 
1708 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1709 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1710 }
1711 
1712 static void qedr_cleanup_user(struct qedr_dev *dev,
1713 			      struct qedr_ucontext *ctx,
1714 			      struct qedr_qp *qp)
1715 {
1716 	ib_umem_release(qp->usq.umem);
1717 	qp->usq.umem = NULL;
1718 
1719 	ib_umem_release(qp->urq.umem);
1720 	qp->urq.umem = NULL;
1721 
1722 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1723 		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1724 		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1725 	} else {
1726 		kfree(qp->usq.pbl_tbl);
1727 		kfree(qp->urq.pbl_tbl);
1728 	}
1729 
1730 	if (qp->usq.db_rec_data) {
1731 		qedr_db_recovery_del(dev, qp->usq.db_addr,
1732 				     &qp->usq.db_rec_data->db_data);
1733 		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1734 	}
1735 
1736 	if (qp->urq.db_rec_data) {
1737 		qedr_db_recovery_del(dev, qp->urq.db_addr,
1738 				     &qp->urq.db_rec_data->db_data);
1739 		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1740 	}
1741 
1742 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1743 		qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1744 				     &qp->urq.db_rec_db2_data);
1745 }
1746 
1747 static int qedr_create_user_qp(struct qedr_dev *dev,
1748 			       struct qedr_qp *qp,
1749 			       struct ib_pd *ibpd,
1750 			       struct ib_udata *udata,
1751 			       struct ib_qp_init_attr *attrs)
1752 {
1753 	struct qed_rdma_create_qp_in_params in_params;
1754 	struct qed_rdma_create_qp_out_params out_params;
1755 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1756 	struct qedr_create_qp_uresp uresp;
1757 	struct qedr_ucontext *ctx = NULL;
1758 	struct qedr_create_qp_ureq ureq;
1759 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1760 	int rc = -EINVAL;
1761 
1762 	qp->create_type = QEDR_QP_CREATE_USER;
1763 	memset(&ureq, 0, sizeof(ureq));
1764 	rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
1765 	if (rc) {
1766 		DP_ERR(dev, "Problem copying data from user space\n");
1767 		return rc;
1768 	}
1769 
1770 	/* SQ - read access only (0) */
1771 	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1772 				  ureq.sq_len, true, 0, alloc_and_init);
1773 	if (rc)
1774 		return rc;
1775 
1776 	if (!qp->srq) {
1777 		/* RQ - read access only (0) */
1778 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1779 					  ureq.rq_len, true, 0, alloc_and_init);
1780 		if (rc)
1781 			return rc;
1782 	}
1783 
1784 	memset(&in_params, 0, sizeof(in_params));
1785 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1786 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1787 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1788 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1789 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1790 	if (!qp->srq) {
1791 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1792 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1793 	}
1794 
1795 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1796 					      &in_params, &out_params);
1797 
1798 	if (!qp->qed_qp) {
1799 		rc = -ENOMEM;
1800 		goto err1;
1801 	}
1802 
1803 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1804 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1805 
1806 	qp->qp_id = out_params.qp_id;
1807 	qp->icid = out_params.icid;
1808 
1809 	rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1810 	if (rc)
1811 		goto err;
1812 
1813 	/* db offset was calculated in copy_qp_uresp, now set in the user q */
1814 	ctx = pd->uctx;
1815 	qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1816 	qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1817 
1818 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1819 		qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
1820 
1821 		/* calculate the db_rec_db2 data since it is constant so no
1822 		 *  need to reflect from user
1823 		 */
1824 		qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
1825 		qp->urq.db_rec_db2_data.data.value =
1826 			cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
1827 	}
1828 
1829 	rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1830 				  &qp->usq.db_rec_data->db_data,
1831 				  DB_REC_WIDTH_32B,
1832 				  DB_REC_USER);
1833 	if (rc)
1834 		goto err;
1835 
1836 	rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1837 				  &qp->urq.db_rec_data->db_data,
1838 				  DB_REC_WIDTH_32B,
1839 				  DB_REC_USER);
1840 	if (rc)
1841 		goto err;
1842 
1843 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1844 		rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1845 					  &qp->urq.db_rec_db2_data,
1846 					  DB_REC_WIDTH_32B,
1847 					  DB_REC_USER);
1848 		if (rc)
1849 			goto err;
1850 	}
1851 	qedr_qp_user_print(dev, qp);
1852 
1853 	return rc;
1854 err:
1855 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1856 	if (rc)
1857 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1858 
1859 err1:
1860 	qedr_cleanup_user(dev, ctx, qp);
1861 	return rc;
1862 }
1863 
1864 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1865 {
1866 	int rc;
1867 
1868 	qp->sq.db = dev->db_addr +
1869 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1870 	qp->sq.db_data.data.icid = qp->icid;
1871 
1872 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1873 				  &qp->sq.db_data,
1874 				  DB_REC_WIDTH_32B,
1875 				  DB_REC_KERNEL);
1876 	if (rc)
1877 		return rc;
1878 
1879 	qp->rq.db = dev->db_addr +
1880 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1881 	qp->rq.db_data.data.icid = qp->icid;
1882 	qp->rq.iwarp_db2 = dev->db_addr +
1883 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1884 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1885 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1886 
1887 	rc = qedr_db_recovery_add(dev, qp->rq.db,
1888 				  &qp->rq.db_data,
1889 				  DB_REC_WIDTH_32B,
1890 				  DB_REC_KERNEL);
1891 	if (rc)
1892 		return rc;
1893 
1894 	rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
1895 				  &qp->rq.iwarp_db2_data,
1896 				  DB_REC_WIDTH_32B,
1897 				  DB_REC_KERNEL);
1898 	return rc;
1899 }
1900 
1901 static int
1902 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1903 			   struct qedr_qp *qp,
1904 			   struct qed_rdma_create_qp_in_params *in_params,
1905 			   u32 n_sq_elems, u32 n_rq_elems)
1906 {
1907 	struct qed_rdma_create_qp_out_params out_params;
1908 	struct qed_chain_init_params params = {
1909 		.mode		= QED_CHAIN_MODE_PBL,
1910 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1911 	};
1912 	int rc;
1913 
1914 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
1915 	params.num_elems = n_sq_elems;
1916 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
1917 
1918 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
1919 	if (rc)
1920 		return rc;
1921 
1922 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1923 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1924 
1925 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
1926 	params.elem_size = n_rq_elems;
1927 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
1928 
1929 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
1930 	if (rc)
1931 		return rc;
1932 
1933 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1934 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1935 
1936 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1937 					      in_params, &out_params);
1938 
1939 	if (!qp->qed_qp)
1940 		return -EINVAL;
1941 
1942 	qp->qp_id = out_params.qp_id;
1943 	qp->icid = out_params.icid;
1944 
1945 	return qedr_set_roce_db_info(dev, qp);
1946 }
1947 
1948 static int
1949 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1950 			    struct qedr_qp *qp,
1951 			    struct qed_rdma_create_qp_in_params *in_params,
1952 			    u32 n_sq_elems, u32 n_rq_elems)
1953 {
1954 	struct qed_rdma_create_qp_out_params out_params;
1955 	struct qed_chain_init_params params = {
1956 		.mode		= QED_CHAIN_MODE_PBL,
1957 		.cnt_type	= QED_CHAIN_CNT_TYPE_U32,
1958 	};
1959 	int rc;
1960 
1961 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1962 						     QEDR_SQE_ELEMENT_SIZE,
1963 						     QED_CHAIN_PAGE_SIZE,
1964 						     QED_CHAIN_MODE_PBL);
1965 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1966 						     QEDR_RQE_ELEMENT_SIZE,
1967 						     QED_CHAIN_PAGE_SIZE,
1968 						     QED_CHAIN_MODE_PBL);
1969 
1970 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1971 					      in_params, &out_params);
1972 
1973 	if (!qp->qed_qp)
1974 		return -EINVAL;
1975 
1976 	/* Now we allocate the chain */
1977 
1978 	params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
1979 	params.num_elems = n_sq_elems;
1980 	params.elem_size = QEDR_SQE_ELEMENT_SIZE;
1981 	params.ext_pbl_virt = out_params.sq_pbl_virt;
1982 	params.ext_pbl_phys = out_params.sq_pbl_phys;
1983 
1984 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
1985 	if (rc)
1986 		goto err;
1987 
1988 	params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
1989 	params.num_elems = n_rq_elems;
1990 	params.elem_size = QEDR_RQE_ELEMENT_SIZE;
1991 	params.ext_pbl_virt = out_params.rq_pbl_virt;
1992 	params.ext_pbl_phys = out_params.rq_pbl_phys;
1993 
1994 	rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
1995 	if (rc)
1996 		goto err;
1997 
1998 	qp->qp_id = out_params.qp_id;
1999 	qp->icid = out_params.icid;
2000 
2001 	return qedr_set_iwarp_db_info(dev, qp);
2002 
2003 err:
2004 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2005 
2006 	return rc;
2007 }
2008 
2009 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2010 {
2011 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2012 	kfree(qp->wqe_wr_id);
2013 
2014 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2015 	kfree(qp->rqe_wr_id);
2016 
2017 	/* GSI qp is not registered to db mechanism so no need to delete */
2018 	if (qp->qp_type == IB_QPT_GSI)
2019 		return;
2020 
2021 	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2022 
2023 	if (!qp->srq) {
2024 		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2025 
2026 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2027 			qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2028 					     &qp->rq.iwarp_db2_data);
2029 	}
2030 }
2031 
2032 static int qedr_create_kernel_qp(struct qedr_dev *dev,
2033 				 struct qedr_qp *qp,
2034 				 struct ib_pd *ibpd,
2035 				 struct ib_qp_init_attr *attrs)
2036 {
2037 	struct qed_rdma_create_qp_in_params in_params;
2038 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2039 	int rc = -EINVAL;
2040 	u32 n_rq_elems;
2041 	u32 n_sq_elems;
2042 	u32 n_sq_entries;
2043 
2044 	memset(&in_params, 0, sizeof(in_params));
2045 	qp->create_type = QEDR_QP_CREATE_KERNEL;
2046 
2047 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2048 	 * the ring. The ring should allow at least a single WR, even if the
2049 	 * user requested none, due to allocation issues.
2050 	 * We should add an extra WR since the prod and cons indices of
2051 	 * wqe_wr_id are managed in such a way that the WQ is considered full
2052 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
2053 	 * double the number of entries due an iSER issue that pushes far more
2054 	 * WRs than indicated. If we decline its ib_post_send() then we get
2055 	 * error prints in the dmesg we'd like to avoid.
2056 	 */
2057 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2058 			      dev->attr.max_sqe);
2059 
2060 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2061 				GFP_KERNEL);
2062 	if (!qp->wqe_wr_id) {
2063 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2064 		return -ENOMEM;
2065 	}
2066 
2067 	/* QP handle to be written in CQE */
2068 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2069 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2070 
2071 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2072 	 * the ring. There ring should allow at least a single WR, even if the
2073 	 * user requested none, due to allocation issues.
2074 	 */
2075 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2076 
2077 	/* Allocate driver internal RQ array */
2078 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2079 				GFP_KERNEL);
2080 	if (!qp->rqe_wr_id) {
2081 		DP_ERR(dev,
2082 		       "create qp: failed RQ shadow memory allocation\n");
2083 		kfree(qp->wqe_wr_id);
2084 		return -ENOMEM;
2085 	}
2086 
2087 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2088 
2089 	n_sq_entries = attrs->cap.max_send_wr;
2090 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2091 	n_sq_entries = max_t(u32, n_sq_entries, 1);
2092 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2093 
2094 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2095 
2096 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2097 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2098 						 n_sq_elems, n_rq_elems);
2099 	else
2100 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2101 						n_sq_elems, n_rq_elems);
2102 	if (rc)
2103 		qedr_cleanup_kernel(dev, qp);
2104 
2105 	return rc;
2106 }
2107 
2108 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
2109 			     struct ib_qp_init_attr *attrs,
2110 			     struct ib_udata *udata)
2111 {
2112 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2113 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2114 	struct qedr_qp *qp;
2115 	struct ib_qp *ibqp;
2116 	int rc = 0;
2117 
2118 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2119 		 udata ? "user library" : "kernel", pd);
2120 
2121 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2122 	if (rc)
2123 		return ERR_PTR(rc);
2124 
2125 	DP_DEBUG(dev, QEDR_MSG_QP,
2126 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2127 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2128 		 get_qedr_cq(attrs->send_cq),
2129 		 get_qedr_cq(attrs->send_cq)->icid,
2130 		 get_qedr_cq(attrs->recv_cq),
2131 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2132 
2133 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2134 	if (!qp) {
2135 		DP_ERR(dev, "create qp: failed allocating memory\n");
2136 		return ERR_PTR(-ENOMEM);
2137 	}
2138 
2139 	qedr_set_common_qp_params(dev, qp, pd, attrs);
2140 
2141 	if (attrs->qp_type == IB_QPT_GSI) {
2142 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
2143 		if (IS_ERR(ibqp))
2144 			kfree(qp);
2145 		return ibqp;
2146 	}
2147 
2148 	if (udata)
2149 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2150 	else
2151 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2152 
2153 	if (rc)
2154 		goto err;
2155 
2156 	qp->ibqp.qp_num = qp->qp_id;
2157 
2158 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2159 		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2160 		if (rc)
2161 			goto err;
2162 	}
2163 
2164 	return &qp->ibqp;
2165 
2166 err:
2167 	kfree(qp);
2168 
2169 	return ERR_PTR(-EFAULT);
2170 }
2171 
2172 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2173 {
2174 	switch (qp_state) {
2175 	case QED_ROCE_QP_STATE_RESET:
2176 		return IB_QPS_RESET;
2177 	case QED_ROCE_QP_STATE_INIT:
2178 		return IB_QPS_INIT;
2179 	case QED_ROCE_QP_STATE_RTR:
2180 		return IB_QPS_RTR;
2181 	case QED_ROCE_QP_STATE_RTS:
2182 		return IB_QPS_RTS;
2183 	case QED_ROCE_QP_STATE_SQD:
2184 		return IB_QPS_SQD;
2185 	case QED_ROCE_QP_STATE_ERR:
2186 		return IB_QPS_ERR;
2187 	case QED_ROCE_QP_STATE_SQE:
2188 		return IB_QPS_SQE;
2189 	}
2190 	return IB_QPS_ERR;
2191 }
2192 
2193 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2194 					enum ib_qp_state qp_state)
2195 {
2196 	switch (qp_state) {
2197 	case IB_QPS_RESET:
2198 		return QED_ROCE_QP_STATE_RESET;
2199 	case IB_QPS_INIT:
2200 		return QED_ROCE_QP_STATE_INIT;
2201 	case IB_QPS_RTR:
2202 		return QED_ROCE_QP_STATE_RTR;
2203 	case IB_QPS_RTS:
2204 		return QED_ROCE_QP_STATE_RTS;
2205 	case IB_QPS_SQD:
2206 		return QED_ROCE_QP_STATE_SQD;
2207 	case IB_QPS_ERR:
2208 		return QED_ROCE_QP_STATE_ERR;
2209 	default:
2210 		return QED_ROCE_QP_STATE_ERR;
2211 	}
2212 }
2213 
2214 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2215 {
2216 	qed_chain_reset(&qph->pbl);
2217 	qph->prod = 0;
2218 	qph->cons = 0;
2219 	qph->wqe_cons = 0;
2220 	qph->db_data.data.value = cpu_to_le16(0);
2221 }
2222 
2223 static int qedr_update_qp_state(struct qedr_dev *dev,
2224 				struct qedr_qp *qp,
2225 				enum qed_roce_qp_state cur_state,
2226 				enum qed_roce_qp_state new_state)
2227 {
2228 	int status = 0;
2229 
2230 	if (new_state == cur_state)
2231 		return 0;
2232 
2233 	switch (cur_state) {
2234 	case QED_ROCE_QP_STATE_RESET:
2235 		switch (new_state) {
2236 		case QED_ROCE_QP_STATE_INIT:
2237 			qp->prev_wqe_size = 0;
2238 			qedr_reset_qp_hwq_info(&qp->sq);
2239 			qedr_reset_qp_hwq_info(&qp->rq);
2240 			break;
2241 		default:
2242 			status = -EINVAL;
2243 			break;
2244 		}
2245 		break;
2246 	case QED_ROCE_QP_STATE_INIT:
2247 		switch (new_state) {
2248 		case QED_ROCE_QP_STATE_RTR:
2249 			/* Update doorbell (in case post_recv was
2250 			 * done before move to RTR)
2251 			 */
2252 
2253 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2254 				writel(qp->rq.db_data.raw, qp->rq.db);
2255 			}
2256 			break;
2257 		case QED_ROCE_QP_STATE_ERR:
2258 			break;
2259 		default:
2260 			/* Invalid state change. */
2261 			status = -EINVAL;
2262 			break;
2263 		}
2264 		break;
2265 	case QED_ROCE_QP_STATE_RTR:
2266 		/* RTR->XXX */
2267 		switch (new_state) {
2268 		case QED_ROCE_QP_STATE_RTS:
2269 			break;
2270 		case QED_ROCE_QP_STATE_ERR:
2271 			break;
2272 		default:
2273 			/* Invalid state change. */
2274 			status = -EINVAL;
2275 			break;
2276 		}
2277 		break;
2278 	case QED_ROCE_QP_STATE_RTS:
2279 		/* RTS->XXX */
2280 		switch (new_state) {
2281 		case QED_ROCE_QP_STATE_SQD:
2282 			break;
2283 		case QED_ROCE_QP_STATE_ERR:
2284 			break;
2285 		default:
2286 			/* Invalid state change. */
2287 			status = -EINVAL;
2288 			break;
2289 		}
2290 		break;
2291 	case QED_ROCE_QP_STATE_SQD:
2292 		/* SQD->XXX */
2293 		switch (new_state) {
2294 		case QED_ROCE_QP_STATE_RTS:
2295 		case QED_ROCE_QP_STATE_ERR:
2296 			break;
2297 		default:
2298 			/* Invalid state change. */
2299 			status = -EINVAL;
2300 			break;
2301 		}
2302 		break;
2303 	case QED_ROCE_QP_STATE_ERR:
2304 		/* ERR->XXX */
2305 		switch (new_state) {
2306 		case QED_ROCE_QP_STATE_RESET:
2307 			if ((qp->rq.prod != qp->rq.cons) ||
2308 			    (qp->sq.prod != qp->sq.cons)) {
2309 				DP_NOTICE(dev,
2310 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2311 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2312 					  qp->sq.cons);
2313 				status = -EINVAL;
2314 			}
2315 			break;
2316 		default:
2317 			status = -EINVAL;
2318 			break;
2319 		}
2320 		break;
2321 	default:
2322 		status = -EINVAL;
2323 		break;
2324 	}
2325 
2326 	return status;
2327 }
2328 
2329 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2330 		   int attr_mask, struct ib_udata *udata)
2331 {
2332 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2333 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2334 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2335 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2336 	enum ib_qp_state old_qp_state, new_qp_state;
2337 	enum qed_roce_qp_state cur_state;
2338 	int rc = 0;
2339 
2340 	DP_DEBUG(dev, QEDR_MSG_QP,
2341 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2342 		 attr->qp_state);
2343 
2344 	old_qp_state = qedr_get_ibqp_state(qp->state);
2345 	if (attr_mask & IB_QP_STATE)
2346 		new_qp_state = attr->qp_state;
2347 	else
2348 		new_qp_state = old_qp_state;
2349 
2350 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2351 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2352 					ibqp->qp_type, attr_mask)) {
2353 			DP_ERR(dev,
2354 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2355 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2356 			       attr_mask, qp->qp_id, ibqp->qp_type,
2357 			       old_qp_state, new_qp_state);
2358 			rc = -EINVAL;
2359 			goto err;
2360 		}
2361 	}
2362 
2363 	/* Translate the masks... */
2364 	if (attr_mask & IB_QP_STATE) {
2365 		SET_FIELD(qp_params.modify_flags,
2366 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2367 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2368 	}
2369 
2370 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2371 		qp_params.sqd_async = true;
2372 
2373 	if (attr_mask & IB_QP_PKEY_INDEX) {
2374 		SET_FIELD(qp_params.modify_flags,
2375 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2376 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2377 			rc = -EINVAL;
2378 			goto err;
2379 		}
2380 
2381 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2382 	}
2383 
2384 	if (attr_mask & IB_QP_QKEY)
2385 		qp->qkey = attr->qkey;
2386 
2387 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2388 		SET_FIELD(qp_params.modify_flags,
2389 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2390 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2391 						  IB_ACCESS_REMOTE_READ;
2392 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2393 						   IB_ACCESS_REMOTE_WRITE;
2394 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2395 					       IB_ACCESS_REMOTE_ATOMIC;
2396 	}
2397 
2398 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2399 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2400 			return -EINVAL;
2401 
2402 		if (attr_mask & IB_QP_PATH_MTU) {
2403 			if (attr->path_mtu < IB_MTU_256 ||
2404 			    attr->path_mtu > IB_MTU_4096) {
2405 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2406 				rc = -EINVAL;
2407 				goto err;
2408 			}
2409 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2410 				      ib_mtu_enum_to_int(iboe_get_mtu
2411 							 (dev->ndev->mtu)));
2412 		}
2413 
2414 		if (!qp->mtu) {
2415 			qp->mtu =
2416 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2417 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2418 		}
2419 
2420 		SET_FIELD(qp_params.modify_flags,
2421 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2422 
2423 		qp_params.traffic_class_tos = grh->traffic_class;
2424 		qp_params.flow_label = grh->flow_label;
2425 		qp_params.hop_limit_ttl = grh->hop_limit;
2426 
2427 		qp->sgid_idx = grh->sgid_index;
2428 
2429 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2430 		if (rc) {
2431 			DP_ERR(dev,
2432 			       "modify qp: problems with GID index %d (rc=%d)\n",
2433 			       grh->sgid_index, rc);
2434 			return rc;
2435 		}
2436 
2437 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2438 				   qp_params.remote_mac_addr);
2439 		if (rc)
2440 			return rc;
2441 
2442 		qp_params.use_local_mac = true;
2443 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2444 
2445 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2446 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2447 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2448 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2449 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2450 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2451 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2452 			 qp_params.remote_mac_addr);
2453 
2454 		qp_params.mtu = qp->mtu;
2455 		qp_params.lb_indication = false;
2456 	}
2457 
2458 	if (!qp_params.mtu) {
2459 		/* Stay with current MTU */
2460 		if (qp->mtu)
2461 			qp_params.mtu = qp->mtu;
2462 		else
2463 			qp_params.mtu =
2464 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2465 	}
2466 
2467 	if (attr_mask & IB_QP_TIMEOUT) {
2468 		SET_FIELD(qp_params.modify_flags,
2469 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2470 
2471 		/* The received timeout value is an exponent used like this:
2472 		 *    "12.7.34 LOCAL ACK TIMEOUT
2473 		 *    Value representing the transport (ACK) timeout for use by
2474 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2475 		 * The FW expects timeout in msec so we need to divide the usec
2476 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2477 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2478 		 * The value of zero means infinite so we use a 'max_t' to make
2479 		 * sure that sub 1 msec values will be configured as 1 msec.
2480 		 */
2481 		if (attr->timeout)
2482 			qp_params.ack_timeout =
2483 					1 << max_t(int, attr->timeout - 8, 0);
2484 		else
2485 			qp_params.ack_timeout = 0;
2486 	}
2487 
2488 	if (attr_mask & IB_QP_RETRY_CNT) {
2489 		SET_FIELD(qp_params.modify_flags,
2490 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2491 		qp_params.retry_cnt = attr->retry_cnt;
2492 	}
2493 
2494 	if (attr_mask & IB_QP_RNR_RETRY) {
2495 		SET_FIELD(qp_params.modify_flags,
2496 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2497 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2498 	}
2499 
2500 	if (attr_mask & IB_QP_RQ_PSN) {
2501 		SET_FIELD(qp_params.modify_flags,
2502 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2503 		qp_params.rq_psn = attr->rq_psn;
2504 		qp->rq_psn = attr->rq_psn;
2505 	}
2506 
2507 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2508 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2509 			rc = -EINVAL;
2510 			DP_ERR(dev,
2511 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2512 			       attr->max_rd_atomic,
2513 			       dev->attr.max_qp_req_rd_atomic_resc);
2514 			goto err;
2515 		}
2516 
2517 		SET_FIELD(qp_params.modify_flags,
2518 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2519 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2520 	}
2521 
2522 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2523 		SET_FIELD(qp_params.modify_flags,
2524 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2525 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2526 	}
2527 
2528 	if (attr_mask & IB_QP_SQ_PSN) {
2529 		SET_FIELD(qp_params.modify_flags,
2530 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2531 		qp_params.sq_psn = attr->sq_psn;
2532 		qp->sq_psn = attr->sq_psn;
2533 	}
2534 
2535 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2536 		if (attr->max_dest_rd_atomic >
2537 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2538 			DP_ERR(dev,
2539 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2540 			       attr->max_dest_rd_atomic,
2541 			       dev->attr.max_qp_resp_rd_atomic_resc);
2542 
2543 			rc = -EINVAL;
2544 			goto err;
2545 		}
2546 
2547 		SET_FIELD(qp_params.modify_flags,
2548 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2549 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2550 	}
2551 
2552 	if (attr_mask & IB_QP_DEST_QPN) {
2553 		SET_FIELD(qp_params.modify_flags,
2554 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2555 
2556 		qp_params.dest_qp = attr->dest_qp_num;
2557 		qp->dest_qp_num = attr->dest_qp_num;
2558 	}
2559 
2560 	cur_state = qp->state;
2561 
2562 	/* Update the QP state before the actual ramrod to prevent a race with
2563 	 * fast path. Modifying the QP state to error will cause the device to
2564 	 * flush the CQEs and while polling the flushed CQEs will considered as
2565 	 * a potential issue if the QP isn't in error state.
2566 	 */
2567 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2568 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2569 		qp->state = QED_ROCE_QP_STATE_ERR;
2570 
2571 	if (qp->qp_type != IB_QPT_GSI)
2572 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2573 					      qp->qed_qp, &qp_params);
2574 
2575 	if (attr_mask & IB_QP_STATE) {
2576 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2577 			rc = qedr_update_qp_state(dev, qp, cur_state,
2578 						  qp_params.new_state);
2579 		qp->state = qp_params.new_state;
2580 	}
2581 
2582 err:
2583 	return rc;
2584 }
2585 
2586 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2587 {
2588 	int ib_qp_acc_flags = 0;
2589 
2590 	if (params->incoming_rdma_write_en)
2591 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2592 	if (params->incoming_rdma_read_en)
2593 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2594 	if (params->incoming_atomic_en)
2595 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2596 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2597 	return ib_qp_acc_flags;
2598 }
2599 
2600 int qedr_query_qp(struct ib_qp *ibqp,
2601 		  struct ib_qp_attr *qp_attr,
2602 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2603 {
2604 	struct qed_rdma_query_qp_out_params params;
2605 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2606 	struct qedr_dev *dev = qp->dev;
2607 	int rc = 0;
2608 
2609 	memset(&params, 0, sizeof(params));
2610 
2611 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2612 	if (rc)
2613 		goto err;
2614 
2615 	memset(qp_attr, 0, sizeof(*qp_attr));
2616 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2617 
2618 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2619 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2620 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2621 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2622 	qp_attr->rq_psn = params.rq_psn;
2623 	qp_attr->sq_psn = params.sq_psn;
2624 	qp_attr->dest_qp_num = params.dest_qp;
2625 
2626 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2627 
2628 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2629 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2630 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2631 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2632 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2633 	qp_init_attr->cap = qp_attr->cap;
2634 
2635 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2636 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2637 			params.flow_label, qp->sgid_idx,
2638 			params.hop_limit_ttl, params.traffic_class_tos);
2639 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2640 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2641 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2642 	qp_attr->timeout = params.timeout;
2643 	qp_attr->rnr_retry = params.rnr_retry;
2644 	qp_attr->retry_cnt = params.retry_cnt;
2645 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2646 	qp_attr->pkey_index = params.pkey_index;
2647 	qp_attr->port_num = 1;
2648 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2649 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2650 	qp_attr->alt_pkey_index = 0;
2651 	qp_attr->alt_port_num = 0;
2652 	qp_attr->alt_timeout = 0;
2653 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2654 
2655 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2656 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2657 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2658 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2659 
2660 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2661 		 qp_attr->cap.max_inline_data);
2662 
2663 err:
2664 	return rc;
2665 }
2666 
2667 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2668 				  struct ib_udata *udata)
2669 {
2670 	struct qedr_ucontext *ctx =
2671 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2672 					  ibucontext);
2673 	int rc;
2674 
2675 	if (qp->qp_type != IB_QPT_GSI) {
2676 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2677 		if (rc)
2678 			return rc;
2679 	}
2680 
2681 	if (qp->create_type == QEDR_QP_CREATE_USER)
2682 		qedr_cleanup_user(dev, ctx, qp);
2683 	else
2684 		qedr_cleanup_kernel(dev, qp);
2685 
2686 	return 0;
2687 }
2688 
2689 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2690 {
2691 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2692 	struct qedr_dev *dev = qp->dev;
2693 	struct ib_qp_attr attr;
2694 	int attr_mask = 0;
2695 
2696 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2697 		 qp, qp->qp_type);
2698 
2699 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2700 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2701 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2702 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2703 
2704 			attr.qp_state = IB_QPS_ERR;
2705 			attr_mask |= IB_QP_STATE;
2706 
2707 			/* Change the QP state to ERROR */
2708 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2709 		}
2710 	} else {
2711 		/* If connection establishment started the WAIT_FOR_CONNECT
2712 		 * bit will be on and we need to Wait for the establishment
2713 		 * to complete before destroying the qp.
2714 		 */
2715 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2716 				     &qp->iwarp_cm_flags))
2717 			wait_for_completion(&qp->iwarp_cm_comp);
2718 
2719 		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2720 		 * bit will be on, and we need to wait for the disconnect to
2721 		 * complete before continuing. We can use the same completion,
2722 		 * iwarp_cm_comp, since this is the only place that waits for
2723 		 * this completion and it is sequential. In addition,
2724 		 * disconnect can't occur before the connection is fully
2725 		 * established, therefore if WAIT_FOR_DISCONNECT is on it
2726 		 * means WAIT_FOR_CONNECT is also on and the completion for
2727 		 * CONNECT already occurred.
2728 		 */
2729 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2730 				     &qp->iwarp_cm_flags))
2731 			wait_for_completion(&qp->iwarp_cm_comp);
2732 	}
2733 
2734 	if (qp->qp_type == IB_QPT_GSI)
2735 		qedr_destroy_gsi_qp(dev);
2736 
2737 	/* We need to remove the entry from the xarray before we release the
2738 	 * qp_id to avoid a race of the qp_id being reallocated and failing
2739 	 * on xa_insert
2740 	 */
2741 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2742 		xa_erase(&dev->qps, qp->qp_id);
2743 
2744 	qedr_free_qp_resources(dev, qp, udata);
2745 
2746 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2747 		qedr_iw_qp_rem_ref(&qp->ibqp);
2748 
2749 	return 0;
2750 }
2751 
2752 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2753 		   struct ib_udata *udata)
2754 {
2755 	struct qedr_ah *ah = get_qedr_ah(ibah);
2756 
2757 	rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
2758 
2759 	return 0;
2760 }
2761 
2762 void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2763 {
2764 	struct qedr_ah *ah = get_qedr_ah(ibah);
2765 
2766 	rdma_destroy_ah_attr(&ah->attr);
2767 }
2768 
2769 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2770 {
2771 	struct qedr_pbl *pbl, *tmp;
2772 
2773 	if (info->pbl_table)
2774 		list_add_tail(&info->pbl_table->list_entry,
2775 			      &info->free_pbl_list);
2776 
2777 	if (!list_empty(&info->inuse_pbl_list))
2778 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2779 
2780 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2781 		list_del(&pbl->list_entry);
2782 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2783 	}
2784 }
2785 
2786 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2787 			size_t page_list_len, bool two_layered)
2788 {
2789 	struct qedr_pbl *tmp;
2790 	int rc;
2791 
2792 	INIT_LIST_HEAD(&info->free_pbl_list);
2793 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2794 
2795 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2796 				  page_list_len, two_layered);
2797 	if (rc)
2798 		goto done;
2799 
2800 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2801 	if (IS_ERR(info->pbl_table)) {
2802 		rc = PTR_ERR(info->pbl_table);
2803 		goto done;
2804 	}
2805 
2806 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2807 		 &info->pbl_table->pa);
2808 
2809 	/* in usual case we use 2 PBLs, so we add one to free
2810 	 * list and allocating another one
2811 	 */
2812 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2813 	if (IS_ERR(tmp)) {
2814 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2815 		goto done;
2816 	}
2817 
2818 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2819 
2820 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2821 
2822 done:
2823 	if (rc)
2824 		free_mr_info(dev, info);
2825 
2826 	return rc;
2827 }
2828 
2829 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2830 			       u64 usr_addr, int acc, struct ib_udata *udata)
2831 {
2832 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2833 	struct qedr_mr *mr;
2834 	struct qedr_pd *pd;
2835 	int rc = -ENOMEM;
2836 
2837 	pd = get_qedr_pd(ibpd);
2838 	DP_DEBUG(dev, QEDR_MSG_MR,
2839 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2840 		 pd->pd_id, start, len, usr_addr, acc);
2841 
2842 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2843 		return ERR_PTR(-EINVAL);
2844 
2845 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2846 	if (!mr)
2847 		return ERR_PTR(rc);
2848 
2849 	mr->type = QEDR_MR_USER;
2850 
2851 	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2852 	if (IS_ERR(mr->umem)) {
2853 		rc = -EFAULT;
2854 		goto err0;
2855 	}
2856 
2857 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2858 	if (rc)
2859 		goto err1;
2860 
2861 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2862 			   &mr->info.pbl_info, PAGE_SHIFT);
2863 
2864 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2865 	if (rc) {
2866 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2867 		goto err1;
2868 	}
2869 
2870 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2871 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2872 	mr->hw_mr.key = 0;
2873 	mr->hw_mr.pd = pd->pd_id;
2874 	mr->hw_mr.local_read = 1;
2875 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2876 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2877 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2878 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2879 	mr->hw_mr.mw_bind = false;
2880 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2881 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2882 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2883 	mr->hw_mr.page_size_log = PAGE_SHIFT;
2884 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2885 	mr->hw_mr.length = len;
2886 	mr->hw_mr.vaddr = usr_addr;
2887 	mr->hw_mr.zbva = false;
2888 	mr->hw_mr.phy_mr = false;
2889 	mr->hw_mr.dma_mr = false;
2890 
2891 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2892 	if (rc) {
2893 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2894 		goto err2;
2895 	}
2896 
2897 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2898 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2899 	    mr->hw_mr.remote_atomic)
2900 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2901 
2902 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2903 		 mr->ibmr.lkey);
2904 	return &mr->ibmr;
2905 
2906 err2:
2907 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2908 err1:
2909 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2910 err0:
2911 	kfree(mr);
2912 	return ERR_PTR(rc);
2913 }
2914 
2915 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2916 {
2917 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2918 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2919 	int rc = 0;
2920 
2921 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2922 	if (rc)
2923 		return rc;
2924 
2925 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2926 
2927 	if (mr->type != QEDR_MR_DMA)
2928 		free_mr_info(dev, &mr->info);
2929 
2930 	/* it could be user registered memory. */
2931 	ib_umem_release(mr->umem);
2932 
2933 	kfree(mr);
2934 
2935 	return rc;
2936 }
2937 
2938 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2939 				       int max_page_list_len)
2940 {
2941 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2942 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2943 	struct qedr_mr *mr;
2944 	int rc = -ENOMEM;
2945 
2946 	DP_DEBUG(dev, QEDR_MSG_MR,
2947 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2948 		 max_page_list_len);
2949 
2950 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2951 	if (!mr)
2952 		return ERR_PTR(rc);
2953 
2954 	mr->dev = dev;
2955 	mr->type = QEDR_MR_FRMR;
2956 
2957 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2958 	if (rc)
2959 		goto err0;
2960 
2961 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2962 	if (rc) {
2963 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2964 		goto err0;
2965 	}
2966 
2967 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2968 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2969 	mr->hw_mr.key = 0;
2970 	mr->hw_mr.pd = pd->pd_id;
2971 	mr->hw_mr.local_read = 1;
2972 	mr->hw_mr.local_write = 0;
2973 	mr->hw_mr.remote_read = 0;
2974 	mr->hw_mr.remote_write = 0;
2975 	mr->hw_mr.remote_atomic = 0;
2976 	mr->hw_mr.mw_bind = false;
2977 	mr->hw_mr.pbl_ptr = 0;
2978 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2979 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2980 	mr->hw_mr.fbo = 0;
2981 	mr->hw_mr.length = 0;
2982 	mr->hw_mr.vaddr = 0;
2983 	mr->hw_mr.zbva = false;
2984 	mr->hw_mr.phy_mr = true;
2985 	mr->hw_mr.dma_mr = false;
2986 
2987 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2988 	if (rc) {
2989 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2990 		goto err1;
2991 	}
2992 
2993 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2994 	mr->ibmr.rkey = mr->ibmr.lkey;
2995 
2996 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2997 	return mr;
2998 
2999 err1:
3000 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3001 err0:
3002 	kfree(mr);
3003 	return ERR_PTR(rc);
3004 }
3005 
3006 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3007 			    u32 max_num_sg, struct ib_udata *udata)
3008 {
3009 	struct qedr_mr *mr;
3010 
3011 	if (mr_type != IB_MR_TYPE_MEM_REG)
3012 		return ERR_PTR(-EINVAL);
3013 
3014 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
3015 
3016 	if (IS_ERR(mr))
3017 		return ERR_PTR(-EINVAL);
3018 
3019 	return &mr->ibmr;
3020 }
3021 
3022 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3023 {
3024 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3025 	struct qedr_pbl *pbl_table;
3026 	struct regpair *pbe;
3027 	u32 pbes_in_page;
3028 
3029 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3030 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3031 		return -ENOMEM;
3032 	}
3033 
3034 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3035 		 mr->npages, addr);
3036 
3037 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3038 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3039 	pbe = (struct regpair *)pbl_table->va;
3040 	pbe +=  mr->npages % pbes_in_page;
3041 	pbe->lo = cpu_to_le32((u32)addr);
3042 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3043 
3044 	mr->npages++;
3045 
3046 	return 0;
3047 }
3048 
3049 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3050 {
3051 	int work = info->completed - info->completed_handled - 1;
3052 
3053 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3054 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3055 		struct qedr_pbl *pbl;
3056 
3057 		/* Free all the page list that are possible to be freed
3058 		 * (all the ones that were invalidated), under the assumption
3059 		 * that if an FMR was completed successfully that means that
3060 		 * if there was an invalidate operation before it also ended
3061 		 */
3062 		pbl = list_first_entry(&info->inuse_pbl_list,
3063 				       struct qedr_pbl, list_entry);
3064 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3065 		info->completed_handled++;
3066 	}
3067 }
3068 
3069 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3070 		   int sg_nents, unsigned int *sg_offset)
3071 {
3072 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3073 
3074 	mr->npages = 0;
3075 
3076 	handle_completed_mrs(mr->dev, &mr->info);
3077 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3078 }
3079 
3080 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3081 {
3082 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3083 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3084 	struct qedr_mr *mr;
3085 	int rc;
3086 
3087 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3088 	if (!mr)
3089 		return ERR_PTR(-ENOMEM);
3090 
3091 	mr->type = QEDR_MR_DMA;
3092 
3093 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3094 	if (rc) {
3095 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
3096 		goto err1;
3097 	}
3098 
3099 	/* index only, 18 bit long, lkey = itid << 8 | key */
3100 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3101 	mr->hw_mr.pd = pd->pd_id;
3102 	mr->hw_mr.local_read = 1;
3103 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3104 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3105 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3106 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3107 	mr->hw_mr.dma_mr = true;
3108 
3109 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3110 	if (rc) {
3111 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3112 		goto err2;
3113 	}
3114 
3115 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3116 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3117 	    mr->hw_mr.remote_atomic)
3118 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3119 
3120 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3121 	return &mr->ibmr;
3122 
3123 err2:
3124 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3125 err1:
3126 	kfree(mr);
3127 	return ERR_PTR(rc);
3128 }
3129 
3130 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3131 {
3132 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3133 }
3134 
3135 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3136 {
3137 	int i, len = 0;
3138 
3139 	for (i = 0; i < num_sge; i++)
3140 		len += sg_list[i].length;
3141 
3142 	return len;
3143 }
3144 
3145 static void swap_wqe_data64(u64 *p)
3146 {
3147 	int i;
3148 
3149 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3150 		*p = cpu_to_be64(cpu_to_le64(*p));
3151 }
3152 
3153 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3154 				       struct qedr_qp *qp, u8 *wqe_size,
3155 				       const struct ib_send_wr *wr,
3156 				       const struct ib_send_wr **bad_wr,
3157 				       u8 *bits, u8 bit)
3158 {
3159 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3160 	char *seg_prt, *wqe;
3161 	int i, seg_siz;
3162 
3163 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3164 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3165 		*bad_wr = wr;
3166 		return 0;
3167 	}
3168 
3169 	if (!data_size)
3170 		return data_size;
3171 
3172 	*bits |= bit;
3173 
3174 	seg_prt = NULL;
3175 	wqe = NULL;
3176 	seg_siz = 0;
3177 
3178 	/* Copy data inline */
3179 	for (i = 0; i < wr->num_sge; i++) {
3180 		u32 len = wr->sg_list[i].length;
3181 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3182 
3183 		while (len > 0) {
3184 			u32 cur;
3185 
3186 			/* New segment required */
3187 			if (!seg_siz) {
3188 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3189 				seg_prt = wqe;
3190 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3191 				(*wqe_size)++;
3192 			}
3193 
3194 			/* Calculate currently allowed length */
3195 			cur = min_t(u32, len, seg_siz);
3196 			memcpy(seg_prt, src, cur);
3197 
3198 			/* Update segment variables */
3199 			seg_prt += cur;
3200 			seg_siz -= cur;
3201 
3202 			/* Update sge variables */
3203 			src += cur;
3204 			len -= cur;
3205 
3206 			/* Swap fully-completed segments */
3207 			if (!seg_siz)
3208 				swap_wqe_data64((u64 *)wqe);
3209 		}
3210 	}
3211 
3212 	/* swap last not completed segment */
3213 	if (seg_siz)
3214 		swap_wqe_data64((u64 *)wqe);
3215 
3216 	return data_size;
3217 }
3218 
3219 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3220 	do {							\
3221 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3222 		(sge)->length = cpu_to_le32(vlength);		\
3223 		(sge)->flags = cpu_to_le32(vflags);		\
3224 	} while (0)
3225 
3226 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3227 	do {							\
3228 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3229 		(hdr)->num_sges = num_sge;			\
3230 	} while (0)
3231 
3232 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3233 	do {							\
3234 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3235 		(sge)->length = cpu_to_le32(vlength);		\
3236 		(sge)->l_key = cpu_to_le32(vlkey);		\
3237 	} while (0)
3238 
3239 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3240 				const struct ib_send_wr *wr)
3241 {
3242 	u32 data_size = 0;
3243 	int i;
3244 
3245 	for (i = 0; i < wr->num_sge; i++) {
3246 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3247 
3248 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3249 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3250 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3251 		data_size += wr->sg_list[i].length;
3252 	}
3253 
3254 	if (wqe_size)
3255 		*wqe_size += wr->num_sge;
3256 
3257 	return data_size;
3258 }
3259 
3260 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3261 				     struct qedr_qp *qp,
3262 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3263 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3264 				     const struct ib_send_wr *wr,
3265 				     const struct ib_send_wr **bad_wr)
3266 {
3267 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3268 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3269 
3270 	if (wr->send_flags & IB_SEND_INLINE &&
3271 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3272 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3273 		u8 flags = 0;
3274 
3275 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3276 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3277 						   bad_wr, &rwqe->flags, flags);
3278 	}
3279 
3280 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3281 }
3282 
3283 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3284 				     struct qedr_qp *qp,
3285 				     struct rdma_sq_send_wqe_1st *swqe,
3286 				     struct rdma_sq_send_wqe_2st *swqe2,
3287 				     const struct ib_send_wr *wr,
3288 				     const struct ib_send_wr **bad_wr)
3289 {
3290 	memset(swqe2, 0, sizeof(*swqe2));
3291 	if (wr->send_flags & IB_SEND_INLINE) {
3292 		u8 flags = 0;
3293 
3294 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3295 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3296 						   bad_wr, &swqe->flags, flags);
3297 	}
3298 
3299 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3300 }
3301 
3302 static int qedr_prepare_reg(struct qedr_qp *qp,
3303 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3304 			    const struct ib_reg_wr *wr)
3305 {
3306 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3307 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3308 
3309 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3310 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3311 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3312 	fwqe1->l_key = wr->key;
3313 
3314 	fwqe2->access_ctrl = 0;
3315 
3316 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3317 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3318 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3319 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3320 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3321 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3322 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3323 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3324 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3325 	fwqe2->fmr_ctrl = 0;
3326 
3327 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3328 		   ilog2(mr->ibmr.page_size) - 12);
3329 
3330 	fwqe2->length_hi = 0;
3331 	fwqe2->length_lo = mr->ibmr.length;
3332 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3333 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3334 
3335 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3336 
3337 	return 0;
3338 }
3339 
3340 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3341 {
3342 	switch (opcode) {
3343 	case IB_WR_RDMA_WRITE:
3344 	case IB_WR_RDMA_WRITE_WITH_IMM:
3345 		return IB_WC_RDMA_WRITE;
3346 	case IB_WR_SEND_WITH_IMM:
3347 	case IB_WR_SEND:
3348 	case IB_WR_SEND_WITH_INV:
3349 		return IB_WC_SEND;
3350 	case IB_WR_RDMA_READ:
3351 	case IB_WR_RDMA_READ_WITH_INV:
3352 		return IB_WC_RDMA_READ;
3353 	case IB_WR_ATOMIC_CMP_AND_SWP:
3354 		return IB_WC_COMP_SWAP;
3355 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3356 		return IB_WC_FETCH_ADD;
3357 	case IB_WR_REG_MR:
3358 		return IB_WC_REG_MR;
3359 	case IB_WR_LOCAL_INV:
3360 		return IB_WC_LOCAL_INV;
3361 	default:
3362 		return IB_WC_SEND;
3363 	}
3364 }
3365 
3366 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3367 				      const struct ib_send_wr *wr)
3368 {
3369 	int wq_is_full, err_wr, pbl_is_full;
3370 	struct qedr_dev *dev = qp->dev;
3371 
3372 	/* prevent SQ overflow and/or processing of a bad WR */
3373 	err_wr = wr->num_sge > qp->sq.max_sges;
3374 	wq_is_full = qedr_wq_is_full(&qp->sq);
3375 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3376 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3377 	if (wq_is_full || err_wr || pbl_is_full) {
3378 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3379 			DP_ERR(dev,
3380 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3381 			       qp);
3382 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3383 		}
3384 
3385 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3386 			DP_ERR(dev,
3387 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3388 			       qp);
3389 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3390 		}
3391 
3392 		if (pbl_is_full &&
3393 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3394 			DP_ERR(dev,
3395 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3396 			       qp);
3397 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3398 		}
3399 		return false;
3400 	}
3401 	return true;
3402 }
3403 
3404 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3405 			    const struct ib_send_wr **bad_wr)
3406 {
3407 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3408 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3409 	struct rdma_sq_atomic_wqe_1st *awqe1;
3410 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3411 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3412 	struct rdma_sq_send_wqe_2st *swqe2;
3413 	struct rdma_sq_local_inv_wqe *iwqe;
3414 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3415 	struct rdma_sq_send_wqe_1st *swqe;
3416 	struct rdma_sq_rdma_wqe_1st *rwqe;
3417 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3418 	struct rdma_sq_common_wqe *wqe;
3419 	u32 length;
3420 	int rc = 0;
3421 	bool comp;
3422 
3423 	if (!qedr_can_post_send(qp, wr)) {
3424 		*bad_wr = wr;
3425 		return -ENOMEM;
3426 	}
3427 
3428 	wqe = qed_chain_produce(&qp->sq.pbl);
3429 	qp->wqe_wr_id[qp->sq.prod].signaled =
3430 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3431 
3432 	wqe->flags = 0;
3433 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3434 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3435 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3436 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3437 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3438 		   !!(wr->send_flags & IB_SEND_FENCE));
3439 	wqe->prev_wqe_size = qp->prev_wqe_size;
3440 
3441 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3442 
3443 	switch (wr->opcode) {
3444 	case IB_WR_SEND_WITH_IMM:
3445 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3446 			rc = -EINVAL;
3447 			*bad_wr = wr;
3448 			break;
3449 		}
3450 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3451 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3452 		swqe->wqe_size = 2;
3453 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3454 
3455 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3456 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3457 						   wr, bad_wr);
3458 		swqe->length = cpu_to_le32(length);
3459 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3460 		qp->prev_wqe_size = swqe->wqe_size;
3461 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3462 		break;
3463 	case IB_WR_SEND:
3464 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3465 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3466 
3467 		swqe->wqe_size = 2;
3468 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3469 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3470 						   wr, bad_wr);
3471 		swqe->length = cpu_to_le32(length);
3472 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3473 		qp->prev_wqe_size = swqe->wqe_size;
3474 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3475 		break;
3476 	case IB_WR_SEND_WITH_INV:
3477 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3478 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3479 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3480 		swqe->wqe_size = 2;
3481 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3482 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3483 						   wr, bad_wr);
3484 		swqe->length = cpu_to_le32(length);
3485 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3486 		qp->prev_wqe_size = swqe->wqe_size;
3487 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3488 		break;
3489 
3490 	case IB_WR_RDMA_WRITE_WITH_IMM:
3491 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3492 			rc = -EINVAL;
3493 			*bad_wr = wr;
3494 			break;
3495 		}
3496 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3497 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3498 
3499 		rwqe->wqe_size = 2;
3500 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3501 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3502 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3503 						   wr, bad_wr);
3504 		rwqe->length = cpu_to_le32(length);
3505 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3506 		qp->prev_wqe_size = rwqe->wqe_size;
3507 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3508 		break;
3509 	case IB_WR_RDMA_WRITE:
3510 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3511 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3512 
3513 		rwqe->wqe_size = 2;
3514 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3515 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3516 						   wr, bad_wr);
3517 		rwqe->length = cpu_to_le32(length);
3518 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3519 		qp->prev_wqe_size = rwqe->wqe_size;
3520 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3521 		break;
3522 	case IB_WR_RDMA_READ_WITH_INV:
3523 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3524 		/* fallthrough -- same is identical to RDMA READ */
3525 
3526 	case IB_WR_RDMA_READ:
3527 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3528 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3529 
3530 		rwqe->wqe_size = 2;
3531 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3532 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3533 						   wr, bad_wr);
3534 		rwqe->length = cpu_to_le32(length);
3535 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3536 		qp->prev_wqe_size = rwqe->wqe_size;
3537 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3538 		break;
3539 
3540 	case IB_WR_ATOMIC_CMP_AND_SWP:
3541 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3542 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3543 		awqe1->wqe_size = 4;
3544 
3545 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3546 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3547 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3548 
3549 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3550 
3551 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3552 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3553 			DMA_REGPAIR_LE(awqe3->swap_data,
3554 				       atomic_wr(wr)->compare_add);
3555 		} else {
3556 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3557 			DMA_REGPAIR_LE(awqe3->swap_data,
3558 				       atomic_wr(wr)->swap);
3559 			DMA_REGPAIR_LE(awqe3->cmp_data,
3560 				       atomic_wr(wr)->compare_add);
3561 		}
3562 
3563 		qedr_prepare_sq_sges(qp, NULL, wr);
3564 
3565 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3566 		qp->prev_wqe_size = awqe1->wqe_size;
3567 		break;
3568 
3569 	case IB_WR_LOCAL_INV:
3570 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3571 		iwqe->wqe_size = 1;
3572 
3573 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3574 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3575 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3576 		qp->prev_wqe_size = iwqe->wqe_size;
3577 		break;
3578 	case IB_WR_REG_MR:
3579 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3580 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3581 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3582 		fwqe1->wqe_size = 2;
3583 
3584 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3585 		if (rc) {
3586 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3587 			*bad_wr = wr;
3588 			break;
3589 		}
3590 
3591 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3592 		qp->prev_wqe_size = fwqe1->wqe_size;
3593 		break;
3594 	default:
3595 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3596 		rc = -EINVAL;
3597 		*bad_wr = wr;
3598 		break;
3599 	}
3600 
3601 	if (*bad_wr) {
3602 		u16 value;
3603 
3604 		/* Restore prod to its position before
3605 		 * this WR was processed
3606 		 */
3607 		value = le16_to_cpu(qp->sq.db_data.data.value);
3608 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3609 
3610 		/* Restore prev_wqe_size */
3611 		qp->prev_wqe_size = wqe->prev_wqe_size;
3612 		rc = -EINVAL;
3613 		DP_ERR(dev, "POST SEND FAILED\n");
3614 	}
3615 
3616 	return rc;
3617 }
3618 
3619 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3620 		   const struct ib_send_wr **bad_wr)
3621 {
3622 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3623 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3624 	unsigned long flags;
3625 	int rc = 0;
3626 
3627 	*bad_wr = NULL;
3628 
3629 	if (qp->qp_type == IB_QPT_GSI)
3630 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3631 
3632 	spin_lock_irqsave(&qp->q_lock, flags);
3633 
3634 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3635 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3636 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3637 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3638 			spin_unlock_irqrestore(&qp->q_lock, flags);
3639 			*bad_wr = wr;
3640 			DP_DEBUG(dev, QEDR_MSG_CQ,
3641 				 "QP in wrong state! QP icid=0x%x state %d\n",
3642 				 qp->icid, qp->state);
3643 			return -EINVAL;
3644 		}
3645 	}
3646 
3647 	while (wr) {
3648 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3649 		if (rc)
3650 			break;
3651 
3652 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3653 
3654 		qedr_inc_sw_prod(&qp->sq);
3655 
3656 		qp->sq.db_data.data.value++;
3657 
3658 		wr = wr->next;
3659 	}
3660 
3661 	/* Trigger doorbell
3662 	 * If there was a failure in the first WR then it will be triggered in
3663 	 * vane. However this is not harmful (as long as the producer value is
3664 	 * unchanged). For performance reasons we avoid checking for this
3665 	 * redundant doorbell.
3666 	 *
3667 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3668 	 * soon as we give the doorbell, we could get a completion
3669 	 * for this wr, therefore we need to make sure that the
3670 	 * memory is updated before giving the doorbell.
3671 	 * During qedr_poll_cq, rmb is called before accessing the
3672 	 * cqe. This covers for the smp_rmb as well.
3673 	 */
3674 	smp_wmb();
3675 	writel(qp->sq.db_data.raw, qp->sq.db);
3676 
3677 	spin_unlock_irqrestore(&qp->q_lock, flags);
3678 
3679 	return rc;
3680 }
3681 
3682 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3683 {
3684 	u32 used;
3685 
3686 	/* Calculate number of elements used based on producer
3687 	 * count and consumer count and subtract it from max
3688 	 * work request supported so that we get elements left.
3689 	 */
3690 	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3691 
3692 	return hw_srq->max_wr - used;
3693 }
3694 
3695 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3696 		       const struct ib_recv_wr **bad_wr)
3697 {
3698 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3699 	struct qedr_srq_hwq_info *hw_srq;
3700 	struct qedr_dev *dev = srq->dev;
3701 	struct qed_chain *pbl;
3702 	unsigned long flags;
3703 	int status = 0;
3704 	u32 num_sge;
3705 	u32 offset;
3706 
3707 	spin_lock_irqsave(&srq->lock, flags);
3708 
3709 	hw_srq = &srq->hw_srq;
3710 	pbl = &srq->hw_srq.pbl;
3711 	while (wr) {
3712 		struct rdma_srq_wqe_header *hdr;
3713 		int i;
3714 
3715 		if (!qedr_srq_elem_left(hw_srq) ||
3716 		    wr->num_sge > srq->hw_srq.max_sges) {
3717 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3718 			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3719 			       wr->num_sge, srq->hw_srq.max_sges);
3720 			status = -ENOMEM;
3721 			*bad_wr = wr;
3722 			break;
3723 		}
3724 
3725 		hdr = qed_chain_produce(pbl);
3726 		num_sge = wr->num_sge;
3727 		/* Set number of sge and work request id in header */
3728 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3729 
3730 		srq->hw_srq.wr_prod_cnt++;
3731 		hw_srq->wqe_prod++;
3732 		hw_srq->sge_prod++;
3733 
3734 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3735 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3736 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3737 
3738 		for (i = 0; i < wr->num_sge; i++) {
3739 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3740 
3741 			/* Set SGE length, lkey and address */
3742 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3743 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3744 
3745 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3746 				 "[%d]: len %d key %x addr %x:%x\n",
3747 				 i, srq_sge->length, srq_sge->l_key,
3748 				 srq_sge->addr.hi, srq_sge->addr.lo);
3749 			hw_srq->sge_prod++;
3750 		}
3751 
3752 		/* Flush WQE and SGE information before
3753 		 * updating producer.
3754 		 */
3755 		wmb();
3756 
3757 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3758 		 * in first 4 bytes and need to update WQE producer in
3759 		 * next 4 bytes.
3760 		 */
3761 		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3762 		offset = offsetof(struct rdma_srq_producers, wqe_prod);
3763 		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3764 			hw_srq->wqe_prod;
3765 
3766 		/* Flush producer after updating it. */
3767 		wmb();
3768 		wr = wr->next;
3769 	}
3770 
3771 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3772 		 qed_chain_get_elem_left(pbl));
3773 	spin_unlock_irqrestore(&srq->lock, flags);
3774 
3775 	return status;
3776 }
3777 
3778 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3779 		   const struct ib_recv_wr **bad_wr)
3780 {
3781 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3782 	struct qedr_dev *dev = qp->dev;
3783 	unsigned long flags;
3784 	int status = 0;
3785 
3786 	if (qp->qp_type == IB_QPT_GSI)
3787 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3788 
3789 	spin_lock_irqsave(&qp->q_lock, flags);
3790 
3791 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3792 		spin_unlock_irqrestore(&qp->q_lock, flags);
3793 		*bad_wr = wr;
3794 		return -EINVAL;
3795 	}
3796 
3797 	while (wr) {
3798 		int i;
3799 
3800 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3801 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3802 		    wr->num_sge > qp->rq.max_sges) {
3803 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3804 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3805 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3806 			       qp->rq.max_sges);
3807 			status = -ENOMEM;
3808 			*bad_wr = wr;
3809 			break;
3810 		}
3811 		for (i = 0; i < wr->num_sge; i++) {
3812 			u32 flags = 0;
3813 			struct rdma_rq_sge *rqe =
3814 			    qed_chain_produce(&qp->rq.pbl);
3815 
3816 			/* First one must include the number
3817 			 * of SGE in the list
3818 			 */
3819 			if (!i)
3820 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3821 					  wr->num_sge);
3822 
3823 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3824 				  wr->sg_list[i].lkey);
3825 
3826 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3827 				   wr->sg_list[i].length, flags);
3828 		}
3829 
3830 		/* Special case of no sges. FW requires between 1-4 sges...
3831 		 * in this case we need to post 1 sge with length zero. this is
3832 		 * because rdma write with immediate consumes an RQ.
3833 		 */
3834 		if (!wr->num_sge) {
3835 			u32 flags = 0;
3836 			struct rdma_rq_sge *rqe =
3837 			    qed_chain_produce(&qp->rq.pbl);
3838 
3839 			/* First one must include the number
3840 			 * of SGE in the list
3841 			 */
3842 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3843 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3844 
3845 			RQ_SGE_SET(rqe, 0, 0, flags);
3846 			i = 1;
3847 		}
3848 
3849 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3850 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3851 
3852 		qedr_inc_sw_prod(&qp->rq);
3853 
3854 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3855 		 * soon as we give the doorbell, we could get a completion
3856 		 * for this wr, therefore we need to make sure that the
3857 		 * memory is update before giving the doorbell.
3858 		 * During qedr_poll_cq, rmb is called before accessing the
3859 		 * cqe. This covers for the smp_rmb as well.
3860 		 */
3861 		smp_wmb();
3862 
3863 		qp->rq.db_data.data.value++;
3864 
3865 		writel(qp->rq.db_data.raw, qp->rq.db);
3866 
3867 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3868 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3869 		}
3870 
3871 		wr = wr->next;
3872 	}
3873 
3874 	spin_unlock_irqrestore(&qp->q_lock, flags);
3875 
3876 	return status;
3877 }
3878 
3879 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3880 {
3881 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3882 
3883 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3884 		cq->pbl_toggle;
3885 }
3886 
3887 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3888 {
3889 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3890 	struct qedr_qp *qp;
3891 
3892 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3893 						   resp_cqe->qp_handle.lo,
3894 						   u64);
3895 	return qp;
3896 }
3897 
3898 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3899 {
3900 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3901 
3902 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3903 }
3904 
3905 /* Return latest CQE (needs processing) */
3906 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3907 {
3908 	return cq->latest_cqe;
3909 }
3910 
3911 /* In fmr we need to increase the number of fmr completed counter for the fmr
3912  * algorithm determining whether we can free a pbl or not.
3913  * we need to perform this whether the work request was signaled or not. for
3914  * this purpose we call this function from the condition that checks if a wr
3915  * should be skipped, to make sure we don't miss it ( possibly this fmr
3916  * operation was not signalted)
3917  */
3918 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3919 {
3920 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3921 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3922 }
3923 
3924 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3925 		       struct qedr_cq *cq, int num_entries,
3926 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3927 		       int force)
3928 {
3929 	u16 cnt = 0;
3930 
3931 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3932 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3933 			qedr_chk_if_fmr(qp);
3934 			/* skip WC */
3935 			goto next_cqe;
3936 		}
3937 
3938 		/* fill WC */
3939 		wc->status = status;
3940 		wc->vendor_err = 0;
3941 		wc->wc_flags = 0;
3942 		wc->src_qp = qp->id;
3943 		wc->qp = &qp->ibqp;
3944 
3945 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3946 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3947 
3948 		switch (wc->opcode) {
3949 		case IB_WC_RDMA_WRITE:
3950 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3951 			break;
3952 		case IB_WC_COMP_SWAP:
3953 		case IB_WC_FETCH_ADD:
3954 			wc->byte_len = 8;
3955 			break;
3956 		case IB_WC_REG_MR:
3957 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3958 			break;
3959 		case IB_WC_RDMA_READ:
3960 		case IB_WC_SEND:
3961 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3962 			break;
3963 		default:
3964 			break;
3965 		}
3966 
3967 		num_entries--;
3968 		wc++;
3969 		cnt++;
3970 next_cqe:
3971 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3972 			qed_chain_consume(&qp->sq.pbl);
3973 		qedr_inc_sw_cons(&qp->sq);
3974 	}
3975 
3976 	return cnt;
3977 }
3978 
3979 static int qedr_poll_cq_req(struct qedr_dev *dev,
3980 			    struct qedr_qp *qp, struct qedr_cq *cq,
3981 			    int num_entries, struct ib_wc *wc,
3982 			    struct rdma_cqe_requester *req)
3983 {
3984 	int cnt = 0;
3985 
3986 	switch (req->status) {
3987 	case RDMA_CQE_REQ_STS_OK:
3988 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3989 				  IB_WC_SUCCESS, 0);
3990 		break;
3991 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3992 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3993 			DP_DEBUG(dev, QEDR_MSG_CQ,
3994 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3995 				 cq->icid, qp->icid);
3996 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3997 				  IB_WC_WR_FLUSH_ERR, 1);
3998 		break;
3999 	default:
4000 		/* process all WQE before the cosumer */
4001 		qp->state = QED_ROCE_QP_STATE_ERR;
4002 		cnt = process_req(dev, qp, cq, num_entries, wc,
4003 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
4004 		wc += cnt;
4005 		/* if we have extra WC fill it with actual error info */
4006 		if (cnt < num_entries) {
4007 			enum ib_wc_status wc_status;
4008 
4009 			switch (req->status) {
4010 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4011 				DP_ERR(dev,
4012 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4013 				       cq->icid, qp->icid);
4014 				wc_status = IB_WC_BAD_RESP_ERR;
4015 				break;
4016 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4017 				DP_ERR(dev,
4018 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4019 				       cq->icid, qp->icid);
4020 				wc_status = IB_WC_LOC_LEN_ERR;
4021 				break;
4022 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4023 				DP_ERR(dev,
4024 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4025 				       cq->icid, qp->icid);
4026 				wc_status = IB_WC_LOC_QP_OP_ERR;
4027 				break;
4028 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4029 				DP_ERR(dev,
4030 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4031 				       cq->icid, qp->icid);
4032 				wc_status = IB_WC_LOC_PROT_ERR;
4033 				break;
4034 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4035 				DP_ERR(dev,
4036 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4037 				       cq->icid, qp->icid);
4038 				wc_status = IB_WC_MW_BIND_ERR;
4039 				break;
4040 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4041 				DP_ERR(dev,
4042 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4043 				       cq->icid, qp->icid);
4044 				wc_status = IB_WC_REM_INV_REQ_ERR;
4045 				break;
4046 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4047 				DP_ERR(dev,
4048 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4049 				       cq->icid, qp->icid);
4050 				wc_status = IB_WC_REM_ACCESS_ERR;
4051 				break;
4052 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4053 				DP_ERR(dev,
4054 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4055 				       cq->icid, qp->icid);
4056 				wc_status = IB_WC_REM_OP_ERR;
4057 				break;
4058 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4059 				DP_ERR(dev,
4060 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4061 				       cq->icid, qp->icid);
4062 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4063 				break;
4064 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4065 				DP_ERR(dev,
4066 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4067 				       cq->icid, qp->icid);
4068 				wc_status = IB_WC_RETRY_EXC_ERR;
4069 				break;
4070 			default:
4071 				DP_ERR(dev,
4072 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4073 				       cq->icid, qp->icid);
4074 				wc_status = IB_WC_GENERAL_ERR;
4075 			}
4076 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4077 					   wc_status, 1);
4078 		}
4079 	}
4080 
4081 	return cnt;
4082 }
4083 
4084 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4085 {
4086 	switch (status) {
4087 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4088 		return IB_WC_LOC_ACCESS_ERR;
4089 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4090 		return IB_WC_LOC_LEN_ERR;
4091 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4092 		return IB_WC_LOC_QP_OP_ERR;
4093 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4094 		return IB_WC_LOC_PROT_ERR;
4095 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4096 		return IB_WC_MW_BIND_ERR;
4097 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4098 		return IB_WC_REM_INV_RD_REQ_ERR;
4099 	case RDMA_CQE_RESP_STS_OK:
4100 		return IB_WC_SUCCESS;
4101 	default:
4102 		return IB_WC_GENERAL_ERR;
4103 	}
4104 }
4105 
4106 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4107 					  struct ib_wc *wc)
4108 {
4109 	wc->status = IB_WC_SUCCESS;
4110 	wc->byte_len = le32_to_cpu(resp->length);
4111 
4112 	if (resp->flags & QEDR_RESP_IMM) {
4113 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4114 		wc->wc_flags |= IB_WC_WITH_IMM;
4115 
4116 		if (resp->flags & QEDR_RESP_RDMA)
4117 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4118 
4119 		if (resp->flags & QEDR_RESP_INV)
4120 			return -EINVAL;
4121 
4122 	} else if (resp->flags & QEDR_RESP_INV) {
4123 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4124 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4125 
4126 		if (resp->flags & QEDR_RESP_RDMA)
4127 			return -EINVAL;
4128 
4129 	} else if (resp->flags & QEDR_RESP_RDMA) {
4130 		return -EINVAL;
4131 	}
4132 
4133 	return 0;
4134 }
4135 
4136 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4137 			       struct qedr_cq *cq, struct ib_wc *wc,
4138 			       struct rdma_cqe_responder *resp, u64 wr_id)
4139 {
4140 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4141 	wc->opcode = IB_WC_RECV;
4142 	wc->wc_flags = 0;
4143 
4144 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4145 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4146 			DP_ERR(dev,
4147 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4148 			       cq, cq->icid, resp->flags);
4149 
4150 	} else {
4151 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4152 		if (wc->status == IB_WC_GENERAL_ERR)
4153 			DP_ERR(dev,
4154 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4155 			       cq, cq->icid, resp->status);
4156 	}
4157 
4158 	/* Fill the rest of the WC */
4159 	wc->vendor_err = 0;
4160 	wc->src_qp = qp->id;
4161 	wc->qp = &qp->ibqp;
4162 	wc->wr_id = wr_id;
4163 }
4164 
4165 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4166 				struct qedr_cq *cq, struct ib_wc *wc,
4167 				struct rdma_cqe_responder *resp)
4168 {
4169 	struct qedr_srq *srq = qp->srq;
4170 	u64 wr_id;
4171 
4172 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4173 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4174 
4175 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4176 		wc->status = IB_WC_WR_FLUSH_ERR;
4177 		wc->vendor_err = 0;
4178 		wc->wr_id = wr_id;
4179 		wc->byte_len = 0;
4180 		wc->src_qp = qp->id;
4181 		wc->qp = &qp->ibqp;
4182 		wc->wr_id = wr_id;
4183 	} else {
4184 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4185 	}
4186 	srq->hw_srq.wr_cons_cnt++;
4187 
4188 	return 1;
4189 }
4190 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4191 			    struct qedr_cq *cq, struct ib_wc *wc,
4192 			    struct rdma_cqe_responder *resp)
4193 {
4194 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4195 
4196 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4197 
4198 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4199 		qed_chain_consume(&qp->rq.pbl);
4200 	qedr_inc_sw_cons(&qp->rq);
4201 
4202 	return 1;
4203 }
4204 
4205 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4206 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4207 {
4208 	u16 cnt = 0;
4209 
4210 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4211 		/* fill WC */
4212 		wc->status = IB_WC_WR_FLUSH_ERR;
4213 		wc->vendor_err = 0;
4214 		wc->wc_flags = 0;
4215 		wc->src_qp = qp->id;
4216 		wc->byte_len = 0;
4217 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4218 		wc->qp = &qp->ibqp;
4219 		num_entries--;
4220 		wc++;
4221 		cnt++;
4222 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4223 			qed_chain_consume(&qp->rq.pbl);
4224 		qedr_inc_sw_cons(&qp->rq);
4225 	}
4226 
4227 	return cnt;
4228 }
4229 
4230 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4231 				 struct rdma_cqe_responder *resp, int *update)
4232 {
4233 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4234 		consume_cqe(cq);
4235 		*update |= 1;
4236 	}
4237 }
4238 
4239 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4240 				 struct qedr_cq *cq, int num_entries,
4241 				 struct ib_wc *wc,
4242 				 struct rdma_cqe_responder *resp)
4243 {
4244 	int cnt;
4245 
4246 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4247 	consume_cqe(cq);
4248 
4249 	return cnt;
4250 }
4251 
4252 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4253 			     struct qedr_cq *cq, int num_entries,
4254 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4255 			     int *update)
4256 {
4257 	int cnt;
4258 
4259 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4260 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4261 					 resp->rq_cons_or_srq_id);
4262 		try_consume_resp_cqe(cq, qp, resp, update);
4263 	} else {
4264 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4265 		consume_cqe(cq);
4266 		*update |= 1;
4267 	}
4268 
4269 	return cnt;
4270 }
4271 
4272 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4273 				struct rdma_cqe_requester *req, int *update)
4274 {
4275 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4276 		consume_cqe(cq);
4277 		*update |= 1;
4278 	}
4279 }
4280 
4281 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4282 {
4283 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4284 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4285 	union rdma_cqe *cqe;
4286 	u32 old_cons, new_cons;
4287 	unsigned long flags;
4288 	int update = 0;
4289 	int done = 0;
4290 
4291 	if (cq->destroyed) {
4292 		DP_ERR(dev,
4293 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4294 		       cq, cq->icid);
4295 		return 0;
4296 	}
4297 
4298 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4299 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4300 
4301 	spin_lock_irqsave(&cq->cq_lock, flags);
4302 	cqe = cq->latest_cqe;
4303 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4304 	while (num_entries && is_valid_cqe(cq, cqe)) {
4305 		struct qedr_qp *qp;
4306 		int cnt = 0;
4307 
4308 		/* prevent speculative reads of any field of CQE */
4309 		rmb();
4310 
4311 		qp = cqe_get_qp(cqe);
4312 		if (!qp) {
4313 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4314 			break;
4315 		}
4316 
4317 		wc->qp = &qp->ibqp;
4318 
4319 		switch (cqe_get_type(cqe)) {
4320 		case RDMA_CQE_TYPE_REQUESTER:
4321 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4322 					       &cqe->req);
4323 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4324 			break;
4325 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4326 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4327 						&cqe->resp, &update);
4328 			break;
4329 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4330 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4331 						    wc, &cqe->resp);
4332 			update = 1;
4333 			break;
4334 		case RDMA_CQE_TYPE_INVALID:
4335 		default:
4336 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4337 			       cqe_get_type(cqe));
4338 		}
4339 		num_entries -= cnt;
4340 		wc += cnt;
4341 		done += cnt;
4342 
4343 		cqe = get_cqe(cq);
4344 	}
4345 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4346 
4347 	cq->cq_cons += new_cons - old_cons;
4348 
4349 	if (update)
4350 		/* doorbell notifies abount latest VALID entry,
4351 		 * but chain already point to the next INVALID one
4352 		 */
4353 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4354 
4355 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4356 	return done;
4357 }
4358 
4359 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4360 		     u8 port_num, const struct ib_wc *in_wc,
4361 		     const struct ib_grh *in_grh, const struct ib_mad *in,
4362 		     struct ib_mad *out_mad, size_t *out_mad_size,
4363 		     u16 *out_mad_pkey_index)
4364 {
4365 	return IB_MAD_RESULT_SUCCESS;
4366 }
4367