xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 2359ccdd)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_roce_cm.h"
53 
54 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57 					size_t len)
58 {
59 	size_t min_len = min_t(size_t, len, udata->outlen);
60 
61 	return ib_copy_to_udata(udata, src, min_len);
62 }
63 
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
65 {
66 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67 		return -EINVAL;
68 
69 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
70 	return 0;
71 }
72 
73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
74 		      int index, union ib_gid *sgid)
75 {
76 	struct qedr_dev *dev = get_qedr_dev(ibdev);
77 
78 	memset(sgid->raw, 0, sizeof(sgid->raw));
79 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
80 
81 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
82 		 sgid->global.interface_id, sgid->global.subnet_prefix);
83 
84 	return 0;
85 }
86 
87 int qedr_query_device(struct ib_device *ibdev,
88 		      struct ib_device_attr *attr, struct ib_udata *udata)
89 {
90 	struct qedr_dev *dev = get_qedr_dev(ibdev);
91 	struct qedr_device_attr *qattr = &dev->attr;
92 
93 	if (!dev->rdma_ctx) {
94 		DP_ERR(dev,
95 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
96 		       dev->rdma_ctx);
97 		return -EINVAL;
98 	}
99 
100 	memset(attr, 0, sizeof(*attr));
101 
102 	attr->fw_ver = qattr->fw_ver;
103 	attr->sys_image_guid = qattr->sys_image_guid;
104 	attr->max_mr_size = qattr->max_mr_size;
105 	attr->page_size_cap = qattr->page_size_caps;
106 	attr->vendor_id = qattr->vendor_id;
107 	attr->vendor_part_id = qattr->vendor_part_id;
108 	attr->hw_ver = qattr->hw_ver;
109 	attr->max_qp = qattr->max_qp;
110 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
111 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
112 	    IB_DEVICE_RC_RNR_NAK_GEN |
113 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
114 
115 	attr->max_sge = qattr->max_sge;
116 	attr->max_sge_rd = qattr->max_sge;
117 	attr->max_cq = qattr->max_cq;
118 	attr->max_cqe = qattr->max_cqe;
119 	attr->max_mr = qattr->max_mr;
120 	attr->max_mw = qattr->max_mw;
121 	attr->max_pd = qattr->max_pd;
122 	attr->atomic_cap = dev->atomic_cap;
123 	attr->max_fmr = qattr->max_fmr;
124 	attr->max_map_per_fmr = 16;
125 	attr->max_qp_init_rd_atom =
126 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
127 	attr->max_qp_rd_atom =
128 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
129 		attr->max_qp_init_rd_atom);
130 
131 	attr->max_srq = qattr->max_srq;
132 	attr->max_srq_sge = qattr->max_srq_sge;
133 	attr->max_srq_wr = qattr->max_srq_wr;
134 
135 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
136 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
137 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
138 	attr->max_ah = qattr->max_ah;
139 
140 	return 0;
141 }
142 
143 #define QEDR_SPEED_SDR		(1)
144 #define QEDR_SPEED_DDR		(2)
145 #define QEDR_SPEED_QDR		(4)
146 #define QEDR_SPEED_FDR10	(8)
147 #define QEDR_SPEED_FDR		(16)
148 #define QEDR_SPEED_EDR		(32)
149 
150 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
151 					    u8 *ib_width)
152 {
153 	switch (speed) {
154 	case 1000:
155 		*ib_speed = QEDR_SPEED_SDR;
156 		*ib_width = IB_WIDTH_1X;
157 		break;
158 	case 10000:
159 		*ib_speed = QEDR_SPEED_QDR;
160 		*ib_width = IB_WIDTH_1X;
161 		break;
162 
163 	case 20000:
164 		*ib_speed = QEDR_SPEED_DDR;
165 		*ib_width = IB_WIDTH_4X;
166 		break;
167 
168 	case 25000:
169 		*ib_speed = QEDR_SPEED_EDR;
170 		*ib_width = IB_WIDTH_1X;
171 		break;
172 
173 	case 40000:
174 		*ib_speed = QEDR_SPEED_QDR;
175 		*ib_width = IB_WIDTH_4X;
176 		break;
177 
178 	case 50000:
179 		*ib_speed = QEDR_SPEED_QDR;
180 		*ib_width = IB_WIDTH_4X;
181 		break;
182 
183 	case 100000:
184 		*ib_speed = QEDR_SPEED_EDR;
185 		*ib_width = IB_WIDTH_4X;
186 		break;
187 
188 	default:
189 		/* Unsupported */
190 		*ib_speed = QEDR_SPEED_SDR;
191 		*ib_width = IB_WIDTH_1X;
192 	}
193 }
194 
195 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
196 {
197 	struct qedr_dev *dev;
198 	struct qed_rdma_port *rdma_port;
199 
200 	dev = get_qedr_dev(ibdev);
201 	if (port > 1) {
202 		DP_ERR(dev, "invalid_port=0x%x\n", port);
203 		return -EINVAL;
204 	}
205 
206 	if (!dev->rdma_ctx) {
207 		DP_ERR(dev, "rdma_ctx is NULL\n");
208 		return -EINVAL;
209 	}
210 
211 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
212 
213 	/* *attr being zeroed by the caller, avoid zeroing it here */
214 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
215 		attr->state = IB_PORT_ACTIVE;
216 		attr->phys_state = 5;
217 	} else {
218 		attr->state = IB_PORT_DOWN;
219 		attr->phys_state = 3;
220 	}
221 	attr->max_mtu = IB_MTU_4096;
222 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
223 	attr->lid = 0;
224 	attr->lmc = 0;
225 	attr->sm_lid = 0;
226 	attr->sm_sl = 0;
227 	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
228 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
229 		attr->gid_tbl_len = 1;
230 		attr->pkey_tbl_len = 1;
231 	} else {
232 		attr->gid_tbl_len = QEDR_MAX_SGID;
233 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
234 	}
235 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
236 	attr->qkey_viol_cntr = 0;
237 	get_link_speed_and_width(rdma_port->link_speed,
238 				 &attr->active_speed, &attr->active_width);
239 	attr->max_msg_sz = rdma_port->max_msg_size;
240 	attr->max_vl_num = 4;
241 
242 	return 0;
243 }
244 
245 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
246 		     struct ib_port_modify *props)
247 {
248 	struct qedr_dev *dev;
249 
250 	dev = get_qedr_dev(ibdev);
251 	if (port > 1) {
252 		DP_ERR(dev, "invalid_port=0x%x\n", port);
253 		return -EINVAL;
254 	}
255 
256 	return 0;
257 }
258 
259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
260 			 unsigned long len)
261 {
262 	struct qedr_mm *mm;
263 
264 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
265 	if (!mm)
266 		return -ENOMEM;
267 
268 	mm->key.phy_addr = phy_addr;
269 	/* This function might be called with a length which is not a multiple
270 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
271 	 * forces this granularity by increasing the requested size if needed.
272 	 * When qedr_mmap is called, it will search the list with the updated
273 	 * length as a key. To prevent search failures, the length is rounded up
274 	 * in advance to PAGE_SIZE.
275 	 */
276 	mm->key.len = roundup(len, PAGE_SIZE);
277 	INIT_LIST_HEAD(&mm->entry);
278 
279 	mutex_lock(&uctx->mm_list_lock);
280 	list_add(&mm->entry, &uctx->mm_head);
281 	mutex_unlock(&uctx->mm_list_lock);
282 
283 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
284 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
285 		 (unsigned long long)mm->key.phy_addr,
286 		 (unsigned long)mm->key.len, uctx);
287 
288 	return 0;
289 }
290 
291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
292 			     unsigned long len)
293 {
294 	bool found = false;
295 	struct qedr_mm *mm;
296 
297 	mutex_lock(&uctx->mm_list_lock);
298 	list_for_each_entry(mm, &uctx->mm_head, entry) {
299 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
300 			continue;
301 
302 		found = true;
303 		break;
304 	}
305 	mutex_unlock(&uctx->mm_list_lock);
306 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
307 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
308 		 mm->key.phy_addr, mm->key.len, uctx, found);
309 
310 	return found;
311 }
312 
313 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
314 					struct ib_udata *udata)
315 {
316 	int rc;
317 	struct qedr_ucontext *ctx;
318 	struct qedr_alloc_ucontext_resp uresp;
319 	struct qedr_dev *dev = get_qedr_dev(ibdev);
320 	struct qed_rdma_add_user_out_params oparams;
321 
322 	if (!udata)
323 		return ERR_PTR(-EFAULT);
324 
325 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
326 	if (!ctx)
327 		return ERR_PTR(-ENOMEM);
328 
329 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
330 	if (rc) {
331 		DP_ERR(dev,
332 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
333 		       rc);
334 		goto err;
335 	}
336 
337 	ctx->dpi = oparams.dpi;
338 	ctx->dpi_addr = oparams.dpi_addr;
339 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
340 	ctx->dpi_size = oparams.dpi_size;
341 	INIT_LIST_HEAD(&ctx->mm_head);
342 	mutex_init(&ctx->mm_list_lock);
343 
344 	memset(&uresp, 0, sizeof(uresp));
345 
346 	uresp.dpm_enabled = dev->user_dpm_enabled;
347 	uresp.wids_enabled = 1;
348 	uresp.wid_count = oparams.wid_count;
349 	uresp.db_pa = ctx->dpi_phys_addr;
350 	uresp.db_size = ctx->dpi_size;
351 	uresp.max_send_wr = dev->attr.max_sqe;
352 	uresp.max_recv_wr = dev->attr.max_rqe;
353 	uresp.max_srq_wr = dev->attr.max_srq_wr;
354 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
355 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
356 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
357 	uresp.max_cqes = QEDR_MAX_CQES;
358 
359 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
360 	if (rc)
361 		goto err;
362 
363 	ctx->dev = dev;
364 
365 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
366 	if (rc)
367 		goto err;
368 
369 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
370 		 &ctx->ibucontext);
371 	return &ctx->ibucontext;
372 
373 err:
374 	kfree(ctx);
375 	return ERR_PTR(rc);
376 }
377 
378 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
379 {
380 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
381 	struct qedr_mm *mm, *tmp;
382 	int status = 0;
383 
384 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
385 		 uctx);
386 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
387 
388 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
389 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
390 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
391 			 mm->key.phy_addr, mm->key.len, uctx);
392 		list_del(&mm->entry);
393 		kfree(mm);
394 	}
395 
396 	kfree(uctx);
397 	return status;
398 }
399 
400 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
401 {
402 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
403 	struct qedr_dev *dev = get_qedr_dev(context->device);
404 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
405 	u64 unmapped_db = dev->db_phys_addr;
406 	unsigned long len = (vma->vm_end - vma->vm_start);
407 	int rc = 0;
408 	bool found;
409 
410 	DP_DEBUG(dev, QEDR_MSG_INIT,
411 		 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
412 		 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
413 	if (vma->vm_start & (PAGE_SIZE - 1)) {
414 		DP_ERR(dev, "Vma_start not page aligned = %ld\n",
415 		       vma->vm_start);
416 		return -EINVAL;
417 	}
418 
419 	found = qedr_search_mmap(ucontext, vm_page, len);
420 	if (!found) {
421 		DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
422 		       vma->vm_pgoff);
423 		return -EINVAL;
424 	}
425 
426 	DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
427 
428 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
429 						     dev->db_size))) {
430 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
431 		if (vma->vm_flags & VM_READ) {
432 			DP_ERR(dev, "Trying to map doorbell bar for read\n");
433 			return -EPERM;
434 		}
435 
436 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
437 
438 		rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
439 					PAGE_SIZE, vma->vm_page_prot);
440 	} else {
441 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
442 		rc = remap_pfn_range(vma, vma->vm_start,
443 				     vma->vm_pgoff, len, vma->vm_page_prot);
444 	}
445 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
446 	return rc;
447 }
448 
449 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
450 			    struct ib_ucontext *context, struct ib_udata *udata)
451 {
452 	struct qedr_dev *dev = get_qedr_dev(ibdev);
453 	struct qedr_pd *pd;
454 	u16 pd_id;
455 	int rc;
456 
457 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
458 		 (udata && context) ? "User Lib" : "Kernel");
459 
460 	if (!dev->rdma_ctx) {
461 		DP_ERR(dev, "invalid RDMA context\n");
462 		return ERR_PTR(-EINVAL);
463 	}
464 
465 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
466 	if (!pd)
467 		return ERR_PTR(-ENOMEM);
468 
469 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
470 	if (rc)
471 		goto err;
472 
473 	pd->pd_id = pd_id;
474 
475 	if (udata && context) {
476 		struct qedr_alloc_pd_uresp uresp = {
477 			.pd_id = pd_id,
478 		};
479 
480 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
481 		if (rc) {
482 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
483 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
484 			goto err;
485 		}
486 
487 		pd->uctx = get_qedr_ucontext(context);
488 		pd->uctx->pd = pd;
489 	}
490 
491 	return &pd->ibpd;
492 
493 err:
494 	kfree(pd);
495 	return ERR_PTR(rc);
496 }
497 
498 int qedr_dealloc_pd(struct ib_pd *ibpd)
499 {
500 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
501 	struct qedr_pd *pd = get_qedr_pd(ibpd);
502 
503 	if (!pd) {
504 		pr_err("Invalid PD received in dealloc_pd\n");
505 		return -EINVAL;
506 	}
507 
508 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
509 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
510 
511 	kfree(pd);
512 
513 	return 0;
514 }
515 
516 static void qedr_free_pbl(struct qedr_dev *dev,
517 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
518 {
519 	struct pci_dev *pdev = dev->pdev;
520 	int i;
521 
522 	for (i = 0; i < pbl_info->num_pbls; i++) {
523 		if (!pbl[i].va)
524 			continue;
525 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
526 				  pbl[i].va, pbl[i].pa);
527 	}
528 
529 	kfree(pbl);
530 }
531 
532 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
533 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
534 
535 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
536 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
537 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
538 
539 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
540 					   struct qedr_pbl_info *pbl_info,
541 					   gfp_t flags)
542 {
543 	struct pci_dev *pdev = dev->pdev;
544 	struct qedr_pbl *pbl_table;
545 	dma_addr_t *pbl_main_tbl;
546 	dma_addr_t pa;
547 	void *va;
548 	int i;
549 
550 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
551 	if (!pbl_table)
552 		return ERR_PTR(-ENOMEM);
553 
554 	for (i = 0; i < pbl_info->num_pbls; i++) {
555 		va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
556 					 &pa, flags);
557 		if (!va)
558 			goto err;
559 
560 		pbl_table[i].va = va;
561 		pbl_table[i].pa = pa;
562 	}
563 
564 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
565 	 * the first one with physical pointers to all of the rest
566 	 */
567 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
568 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
569 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
570 
571 	return pbl_table;
572 
573 err:
574 	for (i--; i >= 0; i--)
575 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
576 				  pbl_table[i].va, pbl_table[i].pa);
577 
578 	qedr_free_pbl(dev, pbl_info, pbl_table);
579 
580 	return ERR_PTR(-ENOMEM);
581 }
582 
583 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
584 				struct qedr_pbl_info *pbl_info,
585 				u32 num_pbes, int two_layer_capable)
586 {
587 	u32 pbl_capacity;
588 	u32 pbl_size;
589 	u32 num_pbls;
590 
591 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
592 		if (num_pbes > MAX_PBES_TWO_LAYER) {
593 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
594 			       num_pbes);
595 			return -EINVAL;
596 		}
597 
598 		/* calculate required pbl page size */
599 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
600 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
601 			       NUM_PBES_ON_PAGE(pbl_size);
602 
603 		while (pbl_capacity < num_pbes) {
604 			pbl_size *= 2;
605 			pbl_capacity = pbl_size / sizeof(u64);
606 			pbl_capacity = pbl_capacity * pbl_capacity;
607 		}
608 
609 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
610 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
611 		pbl_info->two_layered = true;
612 	} else {
613 		/* One layered PBL */
614 		num_pbls = 1;
615 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
616 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
617 		pbl_info->two_layered = false;
618 	}
619 
620 	pbl_info->num_pbls = num_pbls;
621 	pbl_info->pbl_size = pbl_size;
622 	pbl_info->num_pbes = num_pbes;
623 
624 	DP_DEBUG(dev, QEDR_MSG_MR,
625 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
626 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
627 
628 	return 0;
629 }
630 
631 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
632 			       struct qedr_pbl *pbl,
633 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
634 {
635 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
636 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
637 	struct qedr_pbl *pbl_tbl;
638 	struct scatterlist *sg;
639 	struct regpair *pbe;
640 	u64 pg_addr;
641 	int entry;
642 
643 	if (!pbl_info->num_pbes)
644 		return;
645 
646 	/* If we have a two layered pbl, the first pbl points to the rest
647 	 * of the pbls and the first entry lays on the second pbl in the table
648 	 */
649 	if (pbl_info->two_layered)
650 		pbl_tbl = &pbl[1];
651 	else
652 		pbl_tbl = pbl;
653 
654 	pbe = (struct regpair *)pbl_tbl->va;
655 	if (!pbe) {
656 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
657 		return;
658 	}
659 
660 	pbe_cnt = 0;
661 
662 	shift = umem->page_shift;
663 
664 	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
665 
666 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
667 		pages = sg_dma_len(sg) >> shift;
668 		pg_addr = sg_dma_address(sg);
669 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
670 			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
671 				pbe->lo = cpu_to_le32(pg_addr);
672 				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
673 
674 				pg_addr += BIT(pg_shift);
675 				pbe_cnt++;
676 				total_num_pbes++;
677 				pbe++;
678 
679 				if (total_num_pbes == pbl_info->num_pbes)
680 					return;
681 
682 				/* If the given pbl is full storing the pbes,
683 				 * move to next pbl.
684 				 */
685 				if (pbe_cnt ==
686 				    (pbl_info->pbl_size / sizeof(u64))) {
687 					pbl_tbl++;
688 					pbe = (struct regpair *)pbl_tbl->va;
689 					pbe_cnt = 0;
690 				}
691 
692 				fw_pg_cnt++;
693 			}
694 		}
695 	}
696 }
697 
698 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
699 			      struct qedr_cq *cq, struct ib_udata *udata)
700 {
701 	struct qedr_create_cq_uresp uresp;
702 	int rc;
703 
704 	memset(&uresp, 0, sizeof(uresp));
705 
706 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
707 	uresp.icid = cq->icid;
708 
709 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
710 	if (rc)
711 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
712 
713 	return rc;
714 }
715 
716 static void consume_cqe(struct qedr_cq *cq)
717 {
718 	if (cq->latest_cqe == cq->toggle_cqe)
719 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
720 
721 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
722 }
723 
724 static inline int qedr_align_cq_entries(int entries)
725 {
726 	u64 size, aligned_size;
727 
728 	/* We allocate an extra entry that we don't report to the FW. */
729 	size = (entries + 1) * QEDR_CQE_SIZE;
730 	aligned_size = ALIGN(size, PAGE_SIZE);
731 
732 	return aligned_size / QEDR_CQE_SIZE;
733 }
734 
735 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
736 				       struct qedr_dev *dev,
737 				       struct qedr_userq *q,
738 				       u64 buf_addr, size_t buf_len,
739 				       int access, int dmasync,
740 				       int alloc_and_init)
741 {
742 	u32 fw_pages;
743 	int rc;
744 
745 	q->buf_addr = buf_addr;
746 	q->buf_len = buf_len;
747 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
748 	if (IS_ERR(q->umem)) {
749 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
750 		       PTR_ERR(q->umem));
751 		return PTR_ERR(q->umem);
752 	}
753 
754 	fw_pages = ib_umem_page_count(q->umem) <<
755 	    (q->umem->page_shift - FW_PAGE_SHIFT);
756 
757 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
758 	if (rc)
759 		goto err0;
760 
761 	if (alloc_and_init) {
762 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
763 		if (IS_ERR(q->pbl_tbl)) {
764 			rc = PTR_ERR(q->pbl_tbl);
765 			goto err0;
766 		}
767 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
768 				   FW_PAGE_SHIFT);
769 	} else {
770 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
771 		if (!q->pbl_tbl) {
772 			rc = -ENOMEM;
773 			goto err0;
774 		}
775 	}
776 
777 	return 0;
778 
779 err0:
780 	ib_umem_release(q->umem);
781 	q->umem = NULL;
782 
783 	return rc;
784 }
785 
786 static inline void qedr_init_cq_params(struct qedr_cq *cq,
787 				       struct qedr_ucontext *ctx,
788 				       struct qedr_dev *dev, int vector,
789 				       int chain_entries, int page_cnt,
790 				       u64 pbl_ptr,
791 				       struct qed_rdma_create_cq_in_params
792 				       *params)
793 {
794 	memset(params, 0, sizeof(*params));
795 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
796 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
797 	params->cnq_id = vector;
798 	params->cq_size = chain_entries - 1;
799 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
800 	params->pbl_num_pages = page_cnt;
801 	params->pbl_ptr = pbl_ptr;
802 	params->pbl_two_level = 0;
803 }
804 
805 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
806 {
807 	cq->db.data.agg_flags = flags;
808 	cq->db.data.value = cpu_to_le32(cons);
809 	writeq(cq->db.raw, cq->db_addr);
810 
811 	/* Make sure write would stick */
812 	mmiowb();
813 }
814 
815 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
816 {
817 	struct qedr_cq *cq = get_qedr_cq(ibcq);
818 	unsigned long sflags;
819 	struct qedr_dev *dev;
820 
821 	dev = get_qedr_dev(ibcq->device);
822 
823 	if (cq->destroyed) {
824 		DP_ERR(dev,
825 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
826 		       cq, cq->icid);
827 		return -EINVAL;
828 	}
829 
830 
831 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
832 		return 0;
833 
834 	spin_lock_irqsave(&cq->cq_lock, sflags);
835 
836 	cq->arm_flags = 0;
837 
838 	if (flags & IB_CQ_SOLICITED)
839 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
840 
841 	if (flags & IB_CQ_NEXT_COMP)
842 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
843 
844 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
845 
846 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
847 
848 	return 0;
849 }
850 
851 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
852 			     const struct ib_cq_init_attr *attr,
853 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
854 {
855 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
856 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
857 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
858 	struct qedr_dev *dev = get_qedr_dev(ibdev);
859 	struct qed_rdma_create_cq_in_params params;
860 	struct qedr_create_cq_ureq ureq;
861 	int vector = attr->comp_vector;
862 	int entries = attr->cqe;
863 	struct qedr_cq *cq;
864 	int chain_entries;
865 	int page_cnt;
866 	u64 pbl_ptr;
867 	u16 icid;
868 	int rc;
869 
870 	DP_DEBUG(dev, QEDR_MSG_INIT,
871 		 "create_cq: called from %s. entries=%d, vector=%d\n",
872 		 udata ? "User Lib" : "Kernel", entries, vector);
873 
874 	if (entries > QEDR_MAX_CQES) {
875 		DP_ERR(dev,
876 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
877 		       entries, QEDR_MAX_CQES);
878 		return ERR_PTR(-EINVAL);
879 	}
880 
881 	chain_entries = qedr_align_cq_entries(entries);
882 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
883 
884 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
885 	if (!cq)
886 		return ERR_PTR(-ENOMEM);
887 
888 	if (udata) {
889 		memset(&ureq, 0, sizeof(ureq));
890 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
891 			DP_ERR(dev,
892 			       "create cq: problem copying data from user space\n");
893 			goto err0;
894 		}
895 
896 		if (!ureq.len) {
897 			DP_ERR(dev,
898 			       "create cq: cannot create a cq with 0 entries\n");
899 			goto err0;
900 		}
901 
902 		cq->cq_type = QEDR_CQ_TYPE_USER;
903 
904 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
905 					  ureq.len, IB_ACCESS_LOCAL_WRITE,
906 					  1, 1);
907 		if (rc)
908 			goto err0;
909 
910 		pbl_ptr = cq->q.pbl_tbl->pa;
911 		page_cnt = cq->q.pbl_info.num_pbes;
912 
913 		cq->ibcq.cqe = chain_entries;
914 	} else {
915 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
916 
917 		rc = dev->ops->common->chain_alloc(dev->cdev,
918 						   QED_CHAIN_USE_TO_CONSUME,
919 						   QED_CHAIN_MODE_PBL,
920 						   QED_CHAIN_CNT_TYPE_U32,
921 						   chain_entries,
922 						   sizeof(union rdma_cqe),
923 						   &cq->pbl, NULL);
924 		if (rc)
925 			goto err1;
926 
927 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
928 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
929 		cq->ibcq.cqe = cq->pbl.capacity;
930 	}
931 
932 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
933 			    pbl_ptr, &params);
934 
935 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
936 	if (rc)
937 		goto err2;
938 
939 	cq->icid = icid;
940 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
941 	spin_lock_init(&cq->cq_lock);
942 
943 	if (ib_ctx) {
944 		rc = qedr_copy_cq_uresp(dev, cq, udata);
945 		if (rc)
946 			goto err3;
947 	} else {
948 		/* Generate doorbell address. */
949 		cq->db_addr = dev->db_addr +
950 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
951 		cq->db.data.icid = cq->icid;
952 		cq->db.data.params = DB_AGG_CMD_SET <<
953 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
954 
955 		/* point to the very last element, passing it we will toggle */
956 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
957 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
958 		cq->latest_cqe = NULL;
959 		consume_cqe(cq);
960 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
961 	}
962 
963 	DP_DEBUG(dev, QEDR_MSG_CQ,
964 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
965 		 cq->icid, cq, params.cq_size);
966 
967 	return &cq->ibcq;
968 
969 err3:
970 	destroy_iparams.icid = cq->icid;
971 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
972 				  &destroy_oparams);
973 err2:
974 	if (udata)
975 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
976 	else
977 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
978 err1:
979 	if (udata)
980 		ib_umem_release(cq->q.umem);
981 err0:
982 	kfree(cq);
983 	return ERR_PTR(-EINVAL);
984 }
985 
986 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
987 {
988 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
989 	struct qedr_cq *cq = get_qedr_cq(ibcq);
990 
991 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
992 
993 	return 0;
994 }
995 
996 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
997 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
998 
999 int qedr_destroy_cq(struct ib_cq *ibcq)
1000 {
1001 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1002 	struct qed_rdma_destroy_cq_out_params oparams;
1003 	struct qed_rdma_destroy_cq_in_params iparams;
1004 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1005 	int iter;
1006 	int rc;
1007 
1008 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1009 
1010 	cq->destroyed = 1;
1011 
1012 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1013 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1014 		goto done;
1015 
1016 	iparams.icid = cq->icid;
1017 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1018 	if (rc)
1019 		return rc;
1020 
1021 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1022 
1023 	if (ibcq->uobject && ibcq->uobject->context) {
1024 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1025 		ib_umem_release(cq->q.umem);
1026 	}
1027 
1028 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1029 	 * wait until all CNQ interrupts, if any, are received. This will always
1030 	 * happen and will always happen very fast. If not, then a serious error
1031 	 * has occured. That is why we can use a long delay.
1032 	 * We spin for a short time so we don’t lose time on context switching
1033 	 * in case all the completions are handled in that span. Otherwise
1034 	 * we sleep for a while and check again. Since the CNQ may be
1035 	 * associated with (only) the current CPU we use msleep to allow the
1036 	 * current CPU to be freed.
1037 	 * The CNQ notification is increased in qedr_irq_handler().
1038 	 */
1039 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1040 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1041 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1042 		iter--;
1043 	}
1044 
1045 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1046 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1047 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1048 		iter--;
1049 	}
1050 
1051 	if (oparams.num_cq_notif != cq->cnq_notif)
1052 		goto err;
1053 
1054 	/* Note that we don't need to have explicit code to wait for the
1055 	 * completion of the event handler because it is invoked from the EQ.
1056 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1057 	 * be certain that there's no event handler in process.
1058 	 */
1059 done:
1060 	cq->sig = ~cq->sig;
1061 
1062 	kfree(cq);
1063 
1064 	return 0;
1065 
1066 err:
1067 	DP_ERR(dev,
1068 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1069 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1070 
1071 	return -EINVAL;
1072 }
1073 
1074 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1075 					  struct ib_qp_attr *attr,
1076 					  int attr_mask,
1077 					  struct qed_rdma_modify_qp_in_params
1078 					  *qp_params)
1079 {
1080 	enum rdma_network_type nw_type;
1081 	struct ib_gid_attr gid_attr;
1082 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1083 	union ib_gid gid;
1084 	u32 ipv4_addr;
1085 	int rc = 0;
1086 	int i;
1087 
1088 	rc = ib_get_cached_gid(ibqp->device,
1089 			       rdma_ah_get_port_num(&attr->ah_attr),
1090 			       grh->sgid_index, &gid, &gid_attr);
1091 	if (rc)
1092 		return rc;
1093 
1094 	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1095 
1096 	dev_put(gid_attr.ndev);
1097 	nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1098 	switch (nw_type) {
1099 	case RDMA_NETWORK_IPV6:
1100 		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1101 		       sizeof(qp_params->sgid));
1102 		memcpy(&qp_params->dgid.bytes[0],
1103 		       &grh->dgid,
1104 		       sizeof(qp_params->dgid));
1105 		qp_params->roce_mode = ROCE_V2_IPV6;
1106 		SET_FIELD(qp_params->modify_flags,
1107 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1108 		break;
1109 	case RDMA_NETWORK_IB:
1110 		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1111 		       sizeof(qp_params->sgid));
1112 		memcpy(&qp_params->dgid.bytes[0],
1113 		       &grh->dgid,
1114 		       sizeof(qp_params->dgid));
1115 		qp_params->roce_mode = ROCE_V1;
1116 		break;
1117 	case RDMA_NETWORK_IPV4:
1118 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1119 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1120 		ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1121 		qp_params->sgid.ipv4_addr = ipv4_addr;
1122 		ipv4_addr =
1123 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1124 		qp_params->dgid.ipv4_addr = ipv4_addr;
1125 		SET_FIELD(qp_params->modify_flags,
1126 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1127 		qp_params->roce_mode = ROCE_V2_IPV4;
1128 		break;
1129 	}
1130 
1131 	for (i = 0; i < 4; i++) {
1132 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1133 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1134 	}
1135 
1136 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1137 		qp_params->vlan_id = 0;
1138 
1139 	return 0;
1140 }
1141 
1142 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1143 			       struct ib_qp_init_attr *attrs)
1144 {
1145 	struct qedr_device_attr *qattr = &dev->attr;
1146 
1147 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1148 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1149 		DP_DEBUG(dev, QEDR_MSG_QP,
1150 			 "create qp: unsupported qp type=0x%x requested\n",
1151 			 attrs->qp_type);
1152 		return -EINVAL;
1153 	}
1154 
1155 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1156 		DP_ERR(dev,
1157 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1158 		       attrs->cap.max_send_wr, qattr->max_sqe);
1159 		return -EINVAL;
1160 	}
1161 
1162 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1163 		DP_ERR(dev,
1164 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1165 		       attrs->cap.max_inline_data, qattr->max_inline);
1166 		return -EINVAL;
1167 	}
1168 
1169 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1170 		DP_ERR(dev,
1171 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1172 		       attrs->cap.max_send_sge, qattr->max_sge);
1173 		return -EINVAL;
1174 	}
1175 
1176 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1177 		DP_ERR(dev,
1178 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1179 		       attrs->cap.max_recv_sge, qattr->max_sge);
1180 		return -EINVAL;
1181 	}
1182 
1183 	/* Unprivileged user space cannot create special QP */
1184 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1185 		DP_ERR(dev,
1186 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1187 		       attrs->qp_type);
1188 		return -EINVAL;
1189 	}
1190 
1191 	return 0;
1192 }
1193 
1194 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1195 			       struct qedr_create_qp_uresp *uresp,
1196 			       struct qedr_qp *qp)
1197 {
1198 	/* iWARP requires two doorbells per RQ. */
1199 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1200 		uresp->rq_db_offset =
1201 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1202 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1203 	} else {
1204 		uresp->rq_db_offset =
1205 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1206 	}
1207 
1208 	uresp->rq_icid = qp->icid;
1209 }
1210 
1211 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1212 			       struct qedr_create_qp_uresp *uresp,
1213 			       struct qedr_qp *qp)
1214 {
1215 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1216 
1217 	/* iWARP uses the same cid for rq and sq */
1218 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1219 		uresp->sq_icid = qp->icid;
1220 	else
1221 		uresp->sq_icid = qp->icid + 1;
1222 }
1223 
1224 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1225 			      struct qedr_qp *qp, struct ib_udata *udata)
1226 {
1227 	struct qedr_create_qp_uresp uresp;
1228 	int rc;
1229 
1230 	memset(&uresp, 0, sizeof(uresp));
1231 	qedr_copy_sq_uresp(dev, &uresp, qp);
1232 	qedr_copy_rq_uresp(dev, &uresp, qp);
1233 
1234 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1235 	uresp.qp_id = qp->qp_id;
1236 
1237 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1238 	if (rc)
1239 		DP_ERR(dev,
1240 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1241 		       qp->icid);
1242 
1243 	return rc;
1244 }
1245 
1246 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1247 				      struct qedr_qp *qp,
1248 				      struct qedr_pd *pd,
1249 				      struct ib_qp_init_attr *attrs)
1250 {
1251 	spin_lock_init(&qp->q_lock);
1252 	atomic_set(&qp->refcnt, 1);
1253 	qp->pd = pd;
1254 	qp->qp_type = attrs->qp_type;
1255 	qp->max_inline_data = attrs->cap.max_inline_data;
1256 	qp->sq.max_sges = attrs->cap.max_send_sge;
1257 	qp->state = QED_ROCE_QP_STATE_RESET;
1258 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1259 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1260 	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1261 	qp->dev = dev;
1262 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1263 
1264 	DP_DEBUG(dev, QEDR_MSG_QP,
1265 		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1266 		 qp->rq.max_sges, qp->rq_cq->icid);
1267 	DP_DEBUG(dev, QEDR_MSG_QP,
1268 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1269 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1270 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1271 	DP_DEBUG(dev, QEDR_MSG_QP,
1272 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1273 		 qp->sq.max_sges, qp->sq_cq->icid);
1274 }
1275 
1276 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1277 {
1278 	qp->sq.db = dev->db_addr +
1279 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1280 	qp->sq.db_data.data.icid = qp->icid + 1;
1281 	qp->rq.db = dev->db_addr +
1282 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1283 	qp->rq.db_data.data.icid = qp->icid;
1284 }
1285 
1286 static inline void
1287 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1288 			      struct qedr_pd *pd,
1289 			      struct qedr_qp *qp,
1290 			      struct ib_qp_init_attr *attrs,
1291 			      bool fmr_and_reserved_lkey,
1292 			      struct qed_rdma_create_qp_in_params *params)
1293 {
1294 	/* QP handle to be written in an async event */
1295 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1296 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1297 
1298 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1299 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1300 	params->pd = pd->pd_id;
1301 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1302 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1303 	params->stats_queue = 0;
1304 	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1305 	params->srq_id = 0;
1306 	params->use_srq = false;
1307 }
1308 
1309 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1310 {
1311 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1312 		 "qp=%p. "
1313 		 "sq_addr=0x%llx, "
1314 		 "sq_len=%zd, "
1315 		 "rq_addr=0x%llx, "
1316 		 "rq_len=%zd"
1317 		 "\n",
1318 		 qp,
1319 		 qp->usq.buf_addr,
1320 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1321 }
1322 
1323 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
1324 {
1325 	int rc;
1326 
1327 	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1328 		return 0;
1329 
1330 	idr_preload(GFP_KERNEL);
1331 	spin_lock_irq(&dev->idr_lock);
1332 
1333 	rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
1334 
1335 	spin_unlock_irq(&dev->idr_lock);
1336 	idr_preload_end();
1337 
1338 	return rc < 0 ? rc : 0;
1339 }
1340 
1341 static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
1342 {
1343 	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1344 		return;
1345 
1346 	spin_lock_irq(&dev->idr_lock);
1347 	idr_remove(&dev->qpidr, id);
1348 	spin_unlock_irq(&dev->idr_lock);
1349 }
1350 
1351 static inline void
1352 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1353 			    struct qedr_qp *qp,
1354 			    struct qed_rdma_create_qp_out_params *out_params)
1355 {
1356 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1357 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1358 
1359 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1360 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1361 
1362 	qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1363 	qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1364 
1365 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1366 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1367 }
1368 
1369 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1370 {
1371 	if (qp->usq.umem)
1372 		ib_umem_release(qp->usq.umem);
1373 	qp->usq.umem = NULL;
1374 
1375 	if (qp->urq.umem)
1376 		ib_umem_release(qp->urq.umem);
1377 	qp->urq.umem = NULL;
1378 }
1379 
1380 static int qedr_create_user_qp(struct qedr_dev *dev,
1381 			       struct qedr_qp *qp,
1382 			       struct ib_pd *ibpd,
1383 			       struct ib_udata *udata,
1384 			       struct ib_qp_init_attr *attrs)
1385 {
1386 	struct qed_rdma_create_qp_in_params in_params;
1387 	struct qed_rdma_create_qp_out_params out_params;
1388 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1389 	struct ib_ucontext *ib_ctx = NULL;
1390 	struct qedr_create_qp_ureq ureq;
1391 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1392 	int rc = -EINVAL;
1393 
1394 	ib_ctx = ibpd->uobject->context;
1395 
1396 	memset(&ureq, 0, sizeof(ureq));
1397 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1398 	if (rc) {
1399 		DP_ERR(dev, "Problem copying data from user space\n");
1400 		return rc;
1401 	}
1402 
1403 	/* SQ - read access only (0), dma sync not required (0) */
1404 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1405 				  ureq.sq_len, 0, 0, alloc_and_init);
1406 	if (rc)
1407 		return rc;
1408 
1409 	/* RQ - read access only (0), dma sync not required (0) */
1410 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1411 				  ureq.rq_len, 0, 0, alloc_and_init);
1412 	if (rc)
1413 		return rc;
1414 
1415 	memset(&in_params, 0, sizeof(in_params));
1416 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1417 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1418 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1419 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1420 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1421 	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1422 	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1423 
1424 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1425 					      &in_params, &out_params);
1426 
1427 	if (!qp->qed_qp) {
1428 		rc = -ENOMEM;
1429 		goto err1;
1430 	}
1431 
1432 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1433 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1434 
1435 	qp->qp_id = out_params.qp_id;
1436 	qp->icid = out_params.icid;
1437 
1438 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1439 	if (rc)
1440 		goto err;
1441 
1442 	qedr_qp_user_print(dev, qp);
1443 
1444 	return 0;
1445 err:
1446 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1447 	if (rc)
1448 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1449 
1450 err1:
1451 	qedr_cleanup_user(dev, qp);
1452 	return rc;
1453 }
1454 
1455 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1456 {
1457 	qp->sq.db = dev->db_addr +
1458 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1459 	qp->sq.db_data.data.icid = qp->icid;
1460 
1461 	qp->rq.db = dev->db_addr +
1462 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1463 	qp->rq.db_data.data.icid = qp->icid;
1464 	qp->rq.iwarp_db2 = dev->db_addr +
1465 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1466 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1467 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1468 }
1469 
1470 static int
1471 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1472 			   struct qedr_qp *qp,
1473 			   struct qed_rdma_create_qp_in_params *in_params,
1474 			   u32 n_sq_elems, u32 n_rq_elems)
1475 {
1476 	struct qed_rdma_create_qp_out_params out_params;
1477 	int rc;
1478 
1479 	rc = dev->ops->common->chain_alloc(dev->cdev,
1480 					   QED_CHAIN_USE_TO_PRODUCE,
1481 					   QED_CHAIN_MODE_PBL,
1482 					   QED_CHAIN_CNT_TYPE_U32,
1483 					   n_sq_elems,
1484 					   QEDR_SQE_ELEMENT_SIZE,
1485 					   &qp->sq.pbl, NULL);
1486 
1487 	if (rc)
1488 		return rc;
1489 
1490 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1491 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1492 
1493 	rc = dev->ops->common->chain_alloc(dev->cdev,
1494 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1495 					   QED_CHAIN_MODE_PBL,
1496 					   QED_CHAIN_CNT_TYPE_U32,
1497 					   n_rq_elems,
1498 					   QEDR_RQE_ELEMENT_SIZE,
1499 					   &qp->rq.pbl, NULL);
1500 	if (rc)
1501 		return rc;
1502 
1503 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1504 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1505 
1506 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1507 					      in_params, &out_params);
1508 
1509 	if (!qp->qed_qp)
1510 		return -EINVAL;
1511 
1512 	qp->qp_id = out_params.qp_id;
1513 	qp->icid = out_params.icid;
1514 
1515 	qedr_set_roce_db_info(dev, qp);
1516 	return rc;
1517 }
1518 
1519 static int
1520 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1521 			    struct qedr_qp *qp,
1522 			    struct qed_rdma_create_qp_in_params *in_params,
1523 			    u32 n_sq_elems, u32 n_rq_elems)
1524 {
1525 	struct qed_rdma_create_qp_out_params out_params;
1526 	struct qed_chain_ext_pbl ext_pbl;
1527 	int rc;
1528 
1529 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1530 						     QEDR_SQE_ELEMENT_SIZE,
1531 						     QED_CHAIN_MODE_PBL);
1532 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1533 						     QEDR_RQE_ELEMENT_SIZE,
1534 						     QED_CHAIN_MODE_PBL);
1535 
1536 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1537 					      in_params, &out_params);
1538 
1539 	if (!qp->qed_qp)
1540 		return -EINVAL;
1541 
1542 	/* Now we allocate the chain */
1543 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1544 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1545 
1546 	rc = dev->ops->common->chain_alloc(dev->cdev,
1547 					   QED_CHAIN_USE_TO_PRODUCE,
1548 					   QED_CHAIN_MODE_PBL,
1549 					   QED_CHAIN_CNT_TYPE_U32,
1550 					   n_sq_elems,
1551 					   QEDR_SQE_ELEMENT_SIZE,
1552 					   &qp->sq.pbl, &ext_pbl);
1553 
1554 	if (rc)
1555 		goto err;
1556 
1557 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1558 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1559 
1560 	rc = dev->ops->common->chain_alloc(dev->cdev,
1561 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1562 					   QED_CHAIN_MODE_PBL,
1563 					   QED_CHAIN_CNT_TYPE_U32,
1564 					   n_rq_elems,
1565 					   QEDR_RQE_ELEMENT_SIZE,
1566 					   &qp->rq.pbl, &ext_pbl);
1567 
1568 	if (rc)
1569 		goto err;
1570 
1571 	qp->qp_id = out_params.qp_id;
1572 	qp->icid = out_params.icid;
1573 
1574 	qedr_set_iwarp_db_info(dev, qp);
1575 	return rc;
1576 
1577 err:
1578 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1579 
1580 	return rc;
1581 }
1582 
1583 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1584 {
1585 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1586 	kfree(qp->wqe_wr_id);
1587 
1588 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1589 	kfree(qp->rqe_wr_id);
1590 }
1591 
1592 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1593 				 struct qedr_qp *qp,
1594 				 struct ib_pd *ibpd,
1595 				 struct ib_qp_init_attr *attrs)
1596 {
1597 	struct qed_rdma_create_qp_in_params in_params;
1598 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1599 	int rc = -EINVAL;
1600 	u32 n_rq_elems;
1601 	u32 n_sq_elems;
1602 	u32 n_sq_entries;
1603 
1604 	memset(&in_params, 0, sizeof(in_params));
1605 
1606 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1607 	 * the ring. The ring should allow at least a single WR, even if the
1608 	 * user requested none, due to allocation issues.
1609 	 * We should add an extra WR since the prod and cons indices of
1610 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1611 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1612 	 * double the number of entries due an iSER issue that pushes far more
1613 	 * WRs than indicated. If we decline its ib_post_send() then we get
1614 	 * error prints in the dmesg we'd like to avoid.
1615 	 */
1616 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1617 			      dev->attr.max_sqe);
1618 
1619 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1620 				GFP_KERNEL);
1621 	if (!qp->wqe_wr_id) {
1622 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1623 		return -ENOMEM;
1624 	}
1625 
1626 	/* QP handle to be written in CQE */
1627 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1628 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1629 
1630 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1631 	 * the ring. There ring should allow at least a single WR, even if the
1632 	 * user requested none, due to allocation issues.
1633 	 */
1634 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1635 
1636 	/* Allocate driver internal RQ array */
1637 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1638 				GFP_KERNEL);
1639 	if (!qp->rqe_wr_id) {
1640 		DP_ERR(dev,
1641 		       "create qp: failed RQ shadow memory allocation\n");
1642 		kfree(qp->wqe_wr_id);
1643 		return -ENOMEM;
1644 	}
1645 
1646 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1647 
1648 	n_sq_entries = attrs->cap.max_send_wr;
1649 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1650 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1651 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1652 
1653 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1654 
1655 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1656 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1657 						 n_sq_elems, n_rq_elems);
1658 	else
1659 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1660 						n_sq_elems, n_rq_elems);
1661 	if (rc)
1662 		qedr_cleanup_kernel(dev, qp);
1663 
1664 	return rc;
1665 }
1666 
1667 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1668 			     struct ib_qp_init_attr *attrs,
1669 			     struct ib_udata *udata)
1670 {
1671 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1672 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1673 	struct qedr_qp *qp;
1674 	struct ib_qp *ibqp;
1675 	int rc = 0;
1676 
1677 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1678 		 udata ? "user library" : "kernel", pd);
1679 
1680 	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1681 	if (rc)
1682 		return ERR_PTR(rc);
1683 
1684 	if (attrs->srq)
1685 		return ERR_PTR(-EINVAL);
1686 
1687 	DP_DEBUG(dev, QEDR_MSG_QP,
1688 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1689 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1690 		 get_qedr_cq(attrs->send_cq),
1691 		 get_qedr_cq(attrs->send_cq)->icid,
1692 		 get_qedr_cq(attrs->recv_cq),
1693 		 get_qedr_cq(attrs->recv_cq)->icid);
1694 
1695 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1696 	if (!qp) {
1697 		DP_ERR(dev, "create qp: failed allocating memory\n");
1698 		return ERR_PTR(-ENOMEM);
1699 	}
1700 
1701 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1702 
1703 	if (attrs->qp_type == IB_QPT_GSI) {
1704 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1705 		if (IS_ERR(ibqp))
1706 			kfree(qp);
1707 		return ibqp;
1708 	}
1709 
1710 	if (udata)
1711 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1712 	else
1713 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1714 
1715 	if (rc)
1716 		goto err;
1717 
1718 	qp->ibqp.qp_num = qp->qp_id;
1719 
1720 	rc = qedr_idr_add(dev, qp, qp->qp_id);
1721 	if (rc)
1722 		goto err;
1723 
1724 	return &qp->ibqp;
1725 
1726 err:
1727 	kfree(qp);
1728 
1729 	return ERR_PTR(-EFAULT);
1730 }
1731 
1732 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1733 {
1734 	switch (qp_state) {
1735 	case QED_ROCE_QP_STATE_RESET:
1736 		return IB_QPS_RESET;
1737 	case QED_ROCE_QP_STATE_INIT:
1738 		return IB_QPS_INIT;
1739 	case QED_ROCE_QP_STATE_RTR:
1740 		return IB_QPS_RTR;
1741 	case QED_ROCE_QP_STATE_RTS:
1742 		return IB_QPS_RTS;
1743 	case QED_ROCE_QP_STATE_SQD:
1744 		return IB_QPS_SQD;
1745 	case QED_ROCE_QP_STATE_ERR:
1746 		return IB_QPS_ERR;
1747 	case QED_ROCE_QP_STATE_SQE:
1748 		return IB_QPS_SQE;
1749 	}
1750 	return IB_QPS_ERR;
1751 }
1752 
1753 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1754 					enum ib_qp_state qp_state)
1755 {
1756 	switch (qp_state) {
1757 	case IB_QPS_RESET:
1758 		return QED_ROCE_QP_STATE_RESET;
1759 	case IB_QPS_INIT:
1760 		return QED_ROCE_QP_STATE_INIT;
1761 	case IB_QPS_RTR:
1762 		return QED_ROCE_QP_STATE_RTR;
1763 	case IB_QPS_RTS:
1764 		return QED_ROCE_QP_STATE_RTS;
1765 	case IB_QPS_SQD:
1766 		return QED_ROCE_QP_STATE_SQD;
1767 	case IB_QPS_ERR:
1768 		return QED_ROCE_QP_STATE_ERR;
1769 	default:
1770 		return QED_ROCE_QP_STATE_ERR;
1771 	}
1772 }
1773 
1774 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1775 {
1776 	qed_chain_reset(&qph->pbl);
1777 	qph->prod = 0;
1778 	qph->cons = 0;
1779 	qph->wqe_cons = 0;
1780 	qph->db_data.data.value = cpu_to_le16(0);
1781 }
1782 
1783 static int qedr_update_qp_state(struct qedr_dev *dev,
1784 				struct qedr_qp *qp,
1785 				enum qed_roce_qp_state cur_state,
1786 				enum qed_roce_qp_state new_state)
1787 {
1788 	int status = 0;
1789 
1790 	if (new_state == cur_state)
1791 		return 0;
1792 
1793 	switch (cur_state) {
1794 	case QED_ROCE_QP_STATE_RESET:
1795 		switch (new_state) {
1796 		case QED_ROCE_QP_STATE_INIT:
1797 			qp->prev_wqe_size = 0;
1798 			qedr_reset_qp_hwq_info(&qp->sq);
1799 			qedr_reset_qp_hwq_info(&qp->rq);
1800 			break;
1801 		default:
1802 			status = -EINVAL;
1803 			break;
1804 		};
1805 		break;
1806 	case QED_ROCE_QP_STATE_INIT:
1807 		switch (new_state) {
1808 		case QED_ROCE_QP_STATE_RTR:
1809 			/* Update doorbell (in case post_recv was
1810 			 * done before move to RTR)
1811 			 */
1812 
1813 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
1814 				writel(qp->rq.db_data.raw, qp->rq.db);
1815 				/* Make sure write takes effect */
1816 				mmiowb();
1817 			}
1818 			break;
1819 		case QED_ROCE_QP_STATE_ERR:
1820 			break;
1821 		default:
1822 			/* Invalid state change. */
1823 			status = -EINVAL;
1824 			break;
1825 		};
1826 		break;
1827 	case QED_ROCE_QP_STATE_RTR:
1828 		/* RTR->XXX */
1829 		switch (new_state) {
1830 		case QED_ROCE_QP_STATE_RTS:
1831 			break;
1832 		case QED_ROCE_QP_STATE_ERR:
1833 			break;
1834 		default:
1835 			/* Invalid state change. */
1836 			status = -EINVAL;
1837 			break;
1838 		};
1839 		break;
1840 	case QED_ROCE_QP_STATE_RTS:
1841 		/* RTS->XXX */
1842 		switch (new_state) {
1843 		case QED_ROCE_QP_STATE_SQD:
1844 			break;
1845 		case QED_ROCE_QP_STATE_ERR:
1846 			break;
1847 		default:
1848 			/* Invalid state change. */
1849 			status = -EINVAL;
1850 			break;
1851 		};
1852 		break;
1853 	case QED_ROCE_QP_STATE_SQD:
1854 		/* SQD->XXX */
1855 		switch (new_state) {
1856 		case QED_ROCE_QP_STATE_RTS:
1857 		case QED_ROCE_QP_STATE_ERR:
1858 			break;
1859 		default:
1860 			/* Invalid state change. */
1861 			status = -EINVAL;
1862 			break;
1863 		};
1864 		break;
1865 	case QED_ROCE_QP_STATE_ERR:
1866 		/* ERR->XXX */
1867 		switch (new_state) {
1868 		case QED_ROCE_QP_STATE_RESET:
1869 			if ((qp->rq.prod != qp->rq.cons) ||
1870 			    (qp->sq.prod != qp->sq.cons)) {
1871 				DP_NOTICE(dev,
1872 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1873 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1874 					  qp->sq.cons);
1875 				status = -EINVAL;
1876 			}
1877 			break;
1878 		default:
1879 			status = -EINVAL;
1880 			break;
1881 		};
1882 		break;
1883 	default:
1884 		status = -EINVAL;
1885 		break;
1886 	};
1887 
1888 	return status;
1889 }
1890 
1891 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1892 		   int attr_mask, struct ib_udata *udata)
1893 {
1894 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1895 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1896 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1897 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1898 	enum ib_qp_state old_qp_state, new_qp_state;
1899 	enum qed_roce_qp_state cur_state;
1900 	int rc = 0;
1901 
1902 	DP_DEBUG(dev, QEDR_MSG_QP,
1903 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1904 		 attr->qp_state);
1905 
1906 	old_qp_state = qedr_get_ibqp_state(qp->state);
1907 	if (attr_mask & IB_QP_STATE)
1908 		new_qp_state = attr->qp_state;
1909 	else
1910 		new_qp_state = old_qp_state;
1911 
1912 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1913 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
1914 					ibqp->qp_type, attr_mask,
1915 					IB_LINK_LAYER_ETHERNET)) {
1916 			DP_ERR(dev,
1917 			       "modify qp: invalid attribute mask=0x%x specified for\n"
1918 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1919 			       attr_mask, qp->qp_id, ibqp->qp_type,
1920 			       old_qp_state, new_qp_state);
1921 			rc = -EINVAL;
1922 			goto err;
1923 		}
1924 	}
1925 
1926 	/* Translate the masks... */
1927 	if (attr_mask & IB_QP_STATE) {
1928 		SET_FIELD(qp_params.modify_flags,
1929 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1930 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1931 	}
1932 
1933 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1934 		qp_params.sqd_async = true;
1935 
1936 	if (attr_mask & IB_QP_PKEY_INDEX) {
1937 		SET_FIELD(qp_params.modify_flags,
1938 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1939 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1940 			rc = -EINVAL;
1941 			goto err;
1942 		}
1943 
1944 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1945 	}
1946 
1947 	if (attr_mask & IB_QP_QKEY)
1948 		qp->qkey = attr->qkey;
1949 
1950 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1951 		SET_FIELD(qp_params.modify_flags,
1952 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1953 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1954 						  IB_ACCESS_REMOTE_READ;
1955 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1956 						   IB_ACCESS_REMOTE_WRITE;
1957 		qp_params.incoming_atomic_en = attr->qp_access_flags &
1958 					       IB_ACCESS_REMOTE_ATOMIC;
1959 	}
1960 
1961 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1962 		if (attr_mask & IB_QP_PATH_MTU) {
1963 			if (attr->path_mtu < IB_MTU_256 ||
1964 			    attr->path_mtu > IB_MTU_4096) {
1965 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1966 				rc = -EINVAL;
1967 				goto err;
1968 			}
1969 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1970 				      ib_mtu_enum_to_int(iboe_get_mtu
1971 							 (dev->ndev->mtu)));
1972 		}
1973 
1974 		if (!qp->mtu) {
1975 			qp->mtu =
1976 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1977 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1978 		}
1979 
1980 		SET_FIELD(qp_params.modify_flags,
1981 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1982 
1983 		qp_params.traffic_class_tos = grh->traffic_class;
1984 		qp_params.flow_label = grh->flow_label;
1985 		qp_params.hop_limit_ttl = grh->hop_limit;
1986 
1987 		qp->sgid_idx = grh->sgid_index;
1988 
1989 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1990 		if (rc) {
1991 			DP_ERR(dev,
1992 			       "modify qp: problems with GID index %d (rc=%d)\n",
1993 			       grh->sgid_index, rc);
1994 			return rc;
1995 		}
1996 
1997 		rc = qedr_get_dmac(dev, &attr->ah_attr,
1998 				   qp_params.remote_mac_addr);
1999 		if (rc)
2000 			return rc;
2001 
2002 		qp_params.use_local_mac = true;
2003 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2004 
2005 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2006 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2007 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2008 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2009 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2010 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2011 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2012 			 qp_params.remote_mac_addr);
2013 
2014 		qp_params.mtu = qp->mtu;
2015 		qp_params.lb_indication = false;
2016 	}
2017 
2018 	if (!qp_params.mtu) {
2019 		/* Stay with current MTU */
2020 		if (qp->mtu)
2021 			qp_params.mtu = qp->mtu;
2022 		else
2023 			qp_params.mtu =
2024 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2025 	}
2026 
2027 	if (attr_mask & IB_QP_TIMEOUT) {
2028 		SET_FIELD(qp_params.modify_flags,
2029 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2030 
2031 		/* The received timeout value is an exponent used like this:
2032 		 *    "12.7.34 LOCAL ACK TIMEOUT
2033 		 *    Value representing the transport (ACK) timeout for use by
2034 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2035 		 * The FW expects timeout in msec so we need to divide the usec
2036 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2037 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2038 		 * The value of zero means infinite so we use a 'max_t' to make
2039 		 * sure that sub 1 msec values will be configured as 1 msec.
2040 		 */
2041 		if (attr->timeout)
2042 			qp_params.ack_timeout =
2043 					1 << max_t(int, attr->timeout - 8, 0);
2044 		else
2045 			qp_params.ack_timeout = 0;
2046 	}
2047 
2048 	if (attr_mask & IB_QP_RETRY_CNT) {
2049 		SET_FIELD(qp_params.modify_flags,
2050 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2051 		qp_params.retry_cnt = attr->retry_cnt;
2052 	}
2053 
2054 	if (attr_mask & IB_QP_RNR_RETRY) {
2055 		SET_FIELD(qp_params.modify_flags,
2056 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2057 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2058 	}
2059 
2060 	if (attr_mask & IB_QP_RQ_PSN) {
2061 		SET_FIELD(qp_params.modify_flags,
2062 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2063 		qp_params.rq_psn = attr->rq_psn;
2064 		qp->rq_psn = attr->rq_psn;
2065 	}
2066 
2067 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2068 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2069 			rc = -EINVAL;
2070 			DP_ERR(dev,
2071 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2072 			       attr->max_rd_atomic,
2073 			       dev->attr.max_qp_req_rd_atomic_resc);
2074 			goto err;
2075 		}
2076 
2077 		SET_FIELD(qp_params.modify_flags,
2078 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2079 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2080 	}
2081 
2082 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2083 		SET_FIELD(qp_params.modify_flags,
2084 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2085 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2086 	}
2087 
2088 	if (attr_mask & IB_QP_SQ_PSN) {
2089 		SET_FIELD(qp_params.modify_flags,
2090 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2091 		qp_params.sq_psn = attr->sq_psn;
2092 		qp->sq_psn = attr->sq_psn;
2093 	}
2094 
2095 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2096 		if (attr->max_dest_rd_atomic >
2097 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2098 			DP_ERR(dev,
2099 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2100 			       attr->max_dest_rd_atomic,
2101 			       dev->attr.max_qp_resp_rd_atomic_resc);
2102 
2103 			rc = -EINVAL;
2104 			goto err;
2105 		}
2106 
2107 		SET_FIELD(qp_params.modify_flags,
2108 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2109 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2110 	}
2111 
2112 	if (attr_mask & IB_QP_DEST_QPN) {
2113 		SET_FIELD(qp_params.modify_flags,
2114 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2115 
2116 		qp_params.dest_qp = attr->dest_qp_num;
2117 		qp->dest_qp_num = attr->dest_qp_num;
2118 	}
2119 
2120 	cur_state = qp->state;
2121 
2122 	/* Update the QP state before the actual ramrod to prevent a race with
2123 	 * fast path. Modifying the QP state to error will cause the device to
2124 	 * flush the CQEs and while polling the flushed CQEs will considered as
2125 	 * a potential issue if the QP isn't in error state.
2126 	 */
2127 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2128 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2129 		qp->state = QED_ROCE_QP_STATE_ERR;
2130 
2131 	if (qp->qp_type != IB_QPT_GSI)
2132 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2133 					      qp->qed_qp, &qp_params);
2134 
2135 	if (attr_mask & IB_QP_STATE) {
2136 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2137 			rc = qedr_update_qp_state(dev, qp, cur_state,
2138 						  qp_params.new_state);
2139 		qp->state = qp_params.new_state;
2140 	}
2141 
2142 err:
2143 	return rc;
2144 }
2145 
2146 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2147 {
2148 	int ib_qp_acc_flags = 0;
2149 
2150 	if (params->incoming_rdma_write_en)
2151 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2152 	if (params->incoming_rdma_read_en)
2153 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2154 	if (params->incoming_atomic_en)
2155 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2156 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2157 	return ib_qp_acc_flags;
2158 }
2159 
2160 int qedr_query_qp(struct ib_qp *ibqp,
2161 		  struct ib_qp_attr *qp_attr,
2162 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2163 {
2164 	struct qed_rdma_query_qp_out_params params;
2165 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2166 	struct qedr_dev *dev = qp->dev;
2167 	int rc = 0;
2168 
2169 	memset(&params, 0, sizeof(params));
2170 
2171 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2172 	if (rc)
2173 		goto err;
2174 
2175 	memset(qp_attr, 0, sizeof(*qp_attr));
2176 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2177 
2178 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2179 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2180 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2181 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2182 	qp_attr->rq_psn = params.rq_psn;
2183 	qp_attr->sq_psn = params.sq_psn;
2184 	qp_attr->dest_qp_num = params.dest_qp;
2185 
2186 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2187 
2188 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2189 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2190 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2191 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2192 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2193 	qp_init_attr->cap = qp_attr->cap;
2194 
2195 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2196 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2197 			params.flow_label, qp->sgid_idx,
2198 			params.hop_limit_ttl, params.traffic_class_tos);
2199 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2200 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2201 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2202 	qp_attr->timeout = params.timeout;
2203 	qp_attr->rnr_retry = params.rnr_retry;
2204 	qp_attr->retry_cnt = params.retry_cnt;
2205 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2206 	qp_attr->pkey_index = params.pkey_index;
2207 	qp_attr->port_num = 1;
2208 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2209 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2210 	qp_attr->alt_pkey_index = 0;
2211 	qp_attr->alt_port_num = 0;
2212 	qp_attr->alt_timeout = 0;
2213 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2214 
2215 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2216 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2217 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2218 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2219 
2220 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2221 		 qp_attr->cap.max_inline_data);
2222 
2223 err:
2224 	return rc;
2225 }
2226 
2227 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2228 {
2229 	int rc = 0;
2230 
2231 	if (qp->qp_type != IB_QPT_GSI) {
2232 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2233 		if (rc)
2234 			return rc;
2235 	}
2236 
2237 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2238 		qedr_cleanup_user(dev, qp);
2239 	else
2240 		qedr_cleanup_kernel(dev, qp);
2241 
2242 	return 0;
2243 }
2244 
2245 int qedr_destroy_qp(struct ib_qp *ibqp)
2246 {
2247 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2248 	struct qedr_dev *dev = qp->dev;
2249 	struct ib_qp_attr attr;
2250 	int attr_mask = 0;
2251 	int rc = 0;
2252 
2253 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2254 		 qp, qp->qp_type);
2255 
2256 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2257 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2258 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2259 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2260 
2261 			attr.qp_state = IB_QPS_ERR;
2262 			attr_mask |= IB_QP_STATE;
2263 
2264 			/* Change the QP state to ERROR */
2265 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2266 		}
2267 	} else {
2268 		/* Wait for the connect/accept to complete */
2269 		if (qp->ep) {
2270 			int wait_count = 1;
2271 
2272 			while (qp->ep->during_connect) {
2273 				DP_DEBUG(dev, QEDR_MSG_QP,
2274 					 "Still in during connect/accept\n");
2275 
2276 				msleep(100);
2277 				if (wait_count++ > 200) {
2278 					DP_NOTICE(dev,
2279 						  "during connect timeout\n");
2280 					break;
2281 				}
2282 			}
2283 		}
2284 	}
2285 
2286 	if (qp->qp_type == IB_QPT_GSI)
2287 		qedr_destroy_gsi_qp(dev);
2288 
2289 	qedr_free_qp_resources(dev, qp);
2290 
2291 	if (atomic_dec_and_test(&qp->refcnt)) {
2292 		qedr_idr_remove(dev, qp->qp_id);
2293 		kfree(qp);
2294 	}
2295 	return rc;
2296 }
2297 
2298 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2299 			     struct ib_udata *udata)
2300 {
2301 	struct qedr_ah *ah;
2302 
2303 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2304 	if (!ah)
2305 		return ERR_PTR(-ENOMEM);
2306 
2307 	ah->attr = *attr;
2308 
2309 	return &ah->ibah;
2310 }
2311 
2312 int qedr_destroy_ah(struct ib_ah *ibah)
2313 {
2314 	struct qedr_ah *ah = get_qedr_ah(ibah);
2315 
2316 	kfree(ah);
2317 	return 0;
2318 }
2319 
2320 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2321 {
2322 	struct qedr_pbl *pbl, *tmp;
2323 
2324 	if (info->pbl_table)
2325 		list_add_tail(&info->pbl_table->list_entry,
2326 			      &info->free_pbl_list);
2327 
2328 	if (!list_empty(&info->inuse_pbl_list))
2329 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2330 
2331 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2332 		list_del(&pbl->list_entry);
2333 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2334 	}
2335 }
2336 
2337 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2338 			size_t page_list_len, bool two_layered)
2339 {
2340 	struct qedr_pbl *tmp;
2341 	int rc;
2342 
2343 	INIT_LIST_HEAD(&info->free_pbl_list);
2344 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2345 
2346 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2347 				  page_list_len, two_layered);
2348 	if (rc)
2349 		goto done;
2350 
2351 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2352 	if (IS_ERR(info->pbl_table)) {
2353 		rc = PTR_ERR(info->pbl_table);
2354 		goto done;
2355 	}
2356 
2357 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2358 		 &info->pbl_table->pa);
2359 
2360 	/* in usual case we use 2 PBLs, so we add one to free
2361 	 * list and allocating another one
2362 	 */
2363 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2364 	if (IS_ERR(tmp)) {
2365 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2366 		goto done;
2367 	}
2368 
2369 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2370 
2371 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2372 
2373 done:
2374 	if (rc)
2375 		free_mr_info(dev, info);
2376 
2377 	return rc;
2378 }
2379 
2380 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2381 			       u64 usr_addr, int acc, struct ib_udata *udata)
2382 {
2383 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2384 	struct qedr_mr *mr;
2385 	struct qedr_pd *pd;
2386 	int rc = -ENOMEM;
2387 
2388 	pd = get_qedr_pd(ibpd);
2389 	DP_DEBUG(dev, QEDR_MSG_MR,
2390 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2391 		 pd->pd_id, start, len, usr_addr, acc);
2392 
2393 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2394 		return ERR_PTR(-EINVAL);
2395 
2396 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2397 	if (!mr)
2398 		return ERR_PTR(rc);
2399 
2400 	mr->type = QEDR_MR_USER;
2401 
2402 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2403 	if (IS_ERR(mr->umem)) {
2404 		rc = -EFAULT;
2405 		goto err0;
2406 	}
2407 
2408 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2409 	if (rc)
2410 		goto err1;
2411 
2412 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2413 			   &mr->info.pbl_info, mr->umem->page_shift);
2414 
2415 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2416 	if (rc) {
2417 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2418 		goto err1;
2419 	}
2420 
2421 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2422 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2423 	mr->hw_mr.key = 0;
2424 	mr->hw_mr.pd = pd->pd_id;
2425 	mr->hw_mr.local_read = 1;
2426 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2427 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2428 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2429 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2430 	mr->hw_mr.mw_bind = false;
2431 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2432 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2433 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2434 	mr->hw_mr.page_size_log = mr->umem->page_shift;
2435 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2436 	mr->hw_mr.length = len;
2437 	mr->hw_mr.vaddr = usr_addr;
2438 	mr->hw_mr.zbva = false;
2439 	mr->hw_mr.phy_mr = false;
2440 	mr->hw_mr.dma_mr = false;
2441 
2442 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2443 	if (rc) {
2444 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2445 		goto err2;
2446 	}
2447 
2448 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2449 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2450 	    mr->hw_mr.remote_atomic)
2451 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2452 
2453 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2454 		 mr->ibmr.lkey);
2455 	return &mr->ibmr;
2456 
2457 err2:
2458 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2459 err1:
2460 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2461 err0:
2462 	kfree(mr);
2463 	return ERR_PTR(rc);
2464 }
2465 
2466 int qedr_dereg_mr(struct ib_mr *ib_mr)
2467 {
2468 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2469 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2470 	int rc = 0;
2471 
2472 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2473 	if (rc)
2474 		return rc;
2475 
2476 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2477 
2478 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2479 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2480 
2481 	/* it could be user registered memory. */
2482 	if (mr->umem)
2483 		ib_umem_release(mr->umem);
2484 
2485 	kfree(mr);
2486 
2487 	return rc;
2488 }
2489 
2490 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2491 				       int max_page_list_len)
2492 {
2493 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2494 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2495 	struct qedr_mr *mr;
2496 	int rc = -ENOMEM;
2497 
2498 	DP_DEBUG(dev, QEDR_MSG_MR,
2499 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2500 		 max_page_list_len);
2501 
2502 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2503 	if (!mr)
2504 		return ERR_PTR(rc);
2505 
2506 	mr->dev = dev;
2507 	mr->type = QEDR_MR_FRMR;
2508 
2509 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2510 	if (rc)
2511 		goto err0;
2512 
2513 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2514 	if (rc) {
2515 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2516 		goto err0;
2517 	}
2518 
2519 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2520 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2521 	mr->hw_mr.key = 0;
2522 	mr->hw_mr.pd = pd->pd_id;
2523 	mr->hw_mr.local_read = 1;
2524 	mr->hw_mr.local_write = 0;
2525 	mr->hw_mr.remote_read = 0;
2526 	mr->hw_mr.remote_write = 0;
2527 	mr->hw_mr.remote_atomic = 0;
2528 	mr->hw_mr.mw_bind = false;
2529 	mr->hw_mr.pbl_ptr = 0;
2530 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2531 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2532 	mr->hw_mr.fbo = 0;
2533 	mr->hw_mr.length = 0;
2534 	mr->hw_mr.vaddr = 0;
2535 	mr->hw_mr.zbva = false;
2536 	mr->hw_mr.phy_mr = true;
2537 	mr->hw_mr.dma_mr = false;
2538 
2539 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2540 	if (rc) {
2541 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2542 		goto err1;
2543 	}
2544 
2545 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2546 	mr->ibmr.rkey = mr->ibmr.lkey;
2547 
2548 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2549 	return mr;
2550 
2551 err1:
2552 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2553 err0:
2554 	kfree(mr);
2555 	return ERR_PTR(rc);
2556 }
2557 
2558 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2559 			    enum ib_mr_type mr_type, u32 max_num_sg)
2560 {
2561 	struct qedr_mr *mr;
2562 
2563 	if (mr_type != IB_MR_TYPE_MEM_REG)
2564 		return ERR_PTR(-EINVAL);
2565 
2566 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2567 
2568 	if (IS_ERR(mr))
2569 		return ERR_PTR(-EINVAL);
2570 
2571 	return &mr->ibmr;
2572 }
2573 
2574 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2575 {
2576 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2577 	struct qedr_pbl *pbl_table;
2578 	struct regpair *pbe;
2579 	u32 pbes_in_page;
2580 
2581 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2582 		DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2583 		return -ENOMEM;
2584 	}
2585 
2586 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2587 		 mr->npages, addr);
2588 
2589 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2590 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2591 	pbe = (struct regpair *)pbl_table->va;
2592 	pbe +=  mr->npages % pbes_in_page;
2593 	pbe->lo = cpu_to_le32((u32)addr);
2594 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2595 
2596 	mr->npages++;
2597 
2598 	return 0;
2599 }
2600 
2601 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2602 {
2603 	int work = info->completed - info->completed_handled - 1;
2604 
2605 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2606 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2607 		struct qedr_pbl *pbl;
2608 
2609 		/* Free all the page list that are possible to be freed
2610 		 * (all the ones that were invalidated), under the assumption
2611 		 * that if an FMR was completed successfully that means that
2612 		 * if there was an invalidate operation before it also ended
2613 		 */
2614 		pbl = list_first_entry(&info->inuse_pbl_list,
2615 				       struct qedr_pbl, list_entry);
2616 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2617 		info->completed_handled++;
2618 	}
2619 }
2620 
2621 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2622 		   int sg_nents, unsigned int *sg_offset)
2623 {
2624 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2625 
2626 	mr->npages = 0;
2627 
2628 	handle_completed_mrs(mr->dev, &mr->info);
2629 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2630 }
2631 
2632 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2633 {
2634 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2635 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2636 	struct qedr_mr *mr;
2637 	int rc;
2638 
2639 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2640 	if (!mr)
2641 		return ERR_PTR(-ENOMEM);
2642 
2643 	mr->type = QEDR_MR_DMA;
2644 
2645 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2646 	if (rc) {
2647 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2648 		goto err1;
2649 	}
2650 
2651 	/* index only, 18 bit long, lkey = itid << 8 | key */
2652 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2653 	mr->hw_mr.pd = pd->pd_id;
2654 	mr->hw_mr.local_read = 1;
2655 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2656 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2657 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2658 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2659 	mr->hw_mr.dma_mr = true;
2660 
2661 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2662 	if (rc) {
2663 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2664 		goto err2;
2665 	}
2666 
2667 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2668 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2669 	    mr->hw_mr.remote_atomic)
2670 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2671 
2672 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2673 	return &mr->ibmr;
2674 
2675 err2:
2676 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2677 err1:
2678 	kfree(mr);
2679 	return ERR_PTR(rc);
2680 }
2681 
2682 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2683 {
2684 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2685 }
2686 
2687 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2688 {
2689 	int i, len = 0;
2690 
2691 	for (i = 0; i < num_sge; i++)
2692 		len += sg_list[i].length;
2693 
2694 	return len;
2695 }
2696 
2697 static void swap_wqe_data64(u64 *p)
2698 {
2699 	int i;
2700 
2701 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2702 		*p = cpu_to_be64(cpu_to_le64(*p));
2703 }
2704 
2705 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2706 				       struct qedr_qp *qp, u8 *wqe_size,
2707 				       struct ib_send_wr *wr,
2708 				       struct ib_send_wr **bad_wr, u8 *bits,
2709 				       u8 bit)
2710 {
2711 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2712 	char *seg_prt, *wqe;
2713 	int i, seg_siz;
2714 
2715 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2716 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2717 		*bad_wr = wr;
2718 		return 0;
2719 	}
2720 
2721 	if (!data_size)
2722 		return data_size;
2723 
2724 	*bits |= bit;
2725 
2726 	seg_prt = NULL;
2727 	wqe = NULL;
2728 	seg_siz = 0;
2729 
2730 	/* Copy data inline */
2731 	for (i = 0; i < wr->num_sge; i++) {
2732 		u32 len = wr->sg_list[i].length;
2733 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2734 
2735 		while (len > 0) {
2736 			u32 cur;
2737 
2738 			/* New segment required */
2739 			if (!seg_siz) {
2740 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2741 				seg_prt = wqe;
2742 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2743 				(*wqe_size)++;
2744 			}
2745 
2746 			/* Calculate currently allowed length */
2747 			cur = min_t(u32, len, seg_siz);
2748 			memcpy(seg_prt, src, cur);
2749 
2750 			/* Update segment variables */
2751 			seg_prt += cur;
2752 			seg_siz -= cur;
2753 
2754 			/* Update sge variables */
2755 			src += cur;
2756 			len -= cur;
2757 
2758 			/* Swap fully-completed segments */
2759 			if (!seg_siz)
2760 				swap_wqe_data64((u64 *)wqe);
2761 		}
2762 	}
2763 
2764 	/* swap last not completed segment */
2765 	if (seg_siz)
2766 		swap_wqe_data64((u64 *)wqe);
2767 
2768 	return data_size;
2769 }
2770 
2771 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2772 	do {							\
2773 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2774 		(sge)->length = cpu_to_le32(vlength);		\
2775 		(sge)->flags = cpu_to_le32(vflags);		\
2776 	} while (0)
2777 
2778 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2779 	do {							\
2780 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2781 		(hdr)->num_sges = num_sge;			\
2782 	} while (0)
2783 
2784 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2785 	do {							\
2786 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2787 		(sge)->length = cpu_to_le32(vlength);		\
2788 		(sge)->l_key = cpu_to_le32(vlkey);		\
2789 	} while (0)
2790 
2791 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2792 				struct ib_send_wr *wr)
2793 {
2794 	u32 data_size = 0;
2795 	int i;
2796 
2797 	for (i = 0; i < wr->num_sge; i++) {
2798 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2799 
2800 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2801 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2802 		sge->length = cpu_to_le32(wr->sg_list[i].length);
2803 		data_size += wr->sg_list[i].length;
2804 	}
2805 
2806 	if (wqe_size)
2807 		*wqe_size += wr->num_sge;
2808 
2809 	return data_size;
2810 }
2811 
2812 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2813 				     struct qedr_qp *qp,
2814 				     struct rdma_sq_rdma_wqe_1st *rwqe,
2815 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2816 				     struct ib_send_wr *wr,
2817 				     struct ib_send_wr **bad_wr)
2818 {
2819 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2820 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2821 
2822 	if (wr->send_flags & IB_SEND_INLINE &&
2823 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2824 	     wr->opcode == IB_WR_RDMA_WRITE)) {
2825 		u8 flags = 0;
2826 
2827 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2828 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2829 						   bad_wr, &rwqe->flags, flags);
2830 	}
2831 
2832 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2833 }
2834 
2835 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2836 				     struct qedr_qp *qp,
2837 				     struct rdma_sq_send_wqe_1st *swqe,
2838 				     struct rdma_sq_send_wqe_2st *swqe2,
2839 				     struct ib_send_wr *wr,
2840 				     struct ib_send_wr **bad_wr)
2841 {
2842 	memset(swqe2, 0, sizeof(*swqe2));
2843 	if (wr->send_flags & IB_SEND_INLINE) {
2844 		u8 flags = 0;
2845 
2846 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2847 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2848 						   bad_wr, &swqe->flags, flags);
2849 	}
2850 
2851 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2852 }
2853 
2854 static int qedr_prepare_reg(struct qedr_qp *qp,
2855 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2856 			    struct ib_reg_wr *wr)
2857 {
2858 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2859 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2860 
2861 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2862 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2863 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2864 	fwqe1->l_key = wr->key;
2865 
2866 	fwqe2->access_ctrl = 0;
2867 
2868 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2869 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2870 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2871 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2872 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2873 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2874 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2875 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2876 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2877 	fwqe2->fmr_ctrl = 0;
2878 
2879 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2880 		   ilog2(mr->ibmr.page_size) - 12);
2881 
2882 	fwqe2->length_hi = 0;
2883 	fwqe2->length_lo = mr->ibmr.length;
2884 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2885 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2886 
2887 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2888 
2889 	return 0;
2890 }
2891 
2892 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2893 {
2894 	switch (opcode) {
2895 	case IB_WR_RDMA_WRITE:
2896 	case IB_WR_RDMA_WRITE_WITH_IMM:
2897 		return IB_WC_RDMA_WRITE;
2898 	case IB_WR_SEND_WITH_IMM:
2899 	case IB_WR_SEND:
2900 	case IB_WR_SEND_WITH_INV:
2901 		return IB_WC_SEND;
2902 	case IB_WR_RDMA_READ:
2903 	case IB_WR_RDMA_READ_WITH_INV:
2904 		return IB_WC_RDMA_READ;
2905 	case IB_WR_ATOMIC_CMP_AND_SWP:
2906 		return IB_WC_COMP_SWAP;
2907 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2908 		return IB_WC_FETCH_ADD;
2909 	case IB_WR_REG_MR:
2910 		return IB_WC_REG_MR;
2911 	case IB_WR_LOCAL_INV:
2912 		return IB_WC_LOCAL_INV;
2913 	default:
2914 		return IB_WC_SEND;
2915 	}
2916 }
2917 
2918 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2919 {
2920 	int wq_is_full, err_wr, pbl_is_full;
2921 	struct qedr_dev *dev = qp->dev;
2922 
2923 	/* prevent SQ overflow and/or processing of a bad WR */
2924 	err_wr = wr->num_sge > qp->sq.max_sges;
2925 	wq_is_full = qedr_wq_is_full(&qp->sq);
2926 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2927 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2928 	if (wq_is_full || err_wr || pbl_is_full) {
2929 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2930 			DP_ERR(dev,
2931 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2932 			       qp);
2933 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2934 		}
2935 
2936 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2937 			DP_ERR(dev,
2938 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2939 			       qp);
2940 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2941 		}
2942 
2943 		if (pbl_is_full &&
2944 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2945 			DP_ERR(dev,
2946 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2947 			       qp);
2948 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2949 		}
2950 		return false;
2951 	}
2952 	return true;
2953 }
2954 
2955 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2956 		     struct ib_send_wr **bad_wr)
2957 {
2958 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2959 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2960 	struct rdma_sq_atomic_wqe_1st *awqe1;
2961 	struct rdma_sq_atomic_wqe_2nd *awqe2;
2962 	struct rdma_sq_atomic_wqe_3rd *awqe3;
2963 	struct rdma_sq_send_wqe_2st *swqe2;
2964 	struct rdma_sq_local_inv_wqe *iwqe;
2965 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2966 	struct rdma_sq_send_wqe_1st *swqe;
2967 	struct rdma_sq_rdma_wqe_1st *rwqe;
2968 	struct rdma_sq_fmr_wqe_1st *fwqe1;
2969 	struct rdma_sq_common_wqe *wqe;
2970 	u32 length;
2971 	int rc = 0;
2972 	bool comp;
2973 
2974 	if (!qedr_can_post_send(qp, wr)) {
2975 		*bad_wr = wr;
2976 		return -ENOMEM;
2977 	}
2978 
2979 	wqe = qed_chain_produce(&qp->sq.pbl);
2980 	qp->wqe_wr_id[qp->sq.prod].signaled =
2981 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2982 
2983 	wqe->flags = 0;
2984 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2985 		   !!(wr->send_flags & IB_SEND_SOLICITED));
2986 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2987 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2988 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2989 		   !!(wr->send_flags & IB_SEND_FENCE));
2990 	wqe->prev_wqe_size = qp->prev_wqe_size;
2991 
2992 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2993 
2994 	switch (wr->opcode) {
2995 	case IB_WR_SEND_WITH_IMM:
2996 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
2997 			rc = -EINVAL;
2998 			*bad_wr = wr;
2999 			break;
3000 		}
3001 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3002 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3003 		swqe->wqe_size = 2;
3004 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3005 
3006 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3007 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3008 						   wr, bad_wr);
3009 		swqe->length = cpu_to_le32(length);
3010 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3011 		qp->prev_wqe_size = swqe->wqe_size;
3012 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3013 		break;
3014 	case IB_WR_SEND:
3015 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3016 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3017 
3018 		swqe->wqe_size = 2;
3019 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3020 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3021 						   wr, bad_wr);
3022 		swqe->length = cpu_to_le32(length);
3023 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3024 		qp->prev_wqe_size = swqe->wqe_size;
3025 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3026 		break;
3027 	case IB_WR_SEND_WITH_INV:
3028 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3029 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3030 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3031 		swqe->wqe_size = 2;
3032 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3033 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3034 						   wr, bad_wr);
3035 		swqe->length = cpu_to_le32(length);
3036 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3037 		qp->prev_wqe_size = swqe->wqe_size;
3038 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3039 		break;
3040 
3041 	case IB_WR_RDMA_WRITE_WITH_IMM:
3042 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3043 			rc = -EINVAL;
3044 			*bad_wr = wr;
3045 			break;
3046 		}
3047 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3048 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3049 
3050 		rwqe->wqe_size = 2;
3051 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3052 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3053 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3054 						   wr, bad_wr);
3055 		rwqe->length = cpu_to_le32(length);
3056 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3057 		qp->prev_wqe_size = rwqe->wqe_size;
3058 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3059 		break;
3060 	case IB_WR_RDMA_WRITE:
3061 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3062 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3063 
3064 		rwqe->wqe_size = 2;
3065 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3066 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3067 						   wr, bad_wr);
3068 		rwqe->length = cpu_to_le32(length);
3069 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3070 		qp->prev_wqe_size = rwqe->wqe_size;
3071 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3072 		break;
3073 	case IB_WR_RDMA_READ_WITH_INV:
3074 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3075 		/* fallthrough -- same is identical to RDMA READ */
3076 
3077 	case IB_WR_RDMA_READ:
3078 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3079 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3080 
3081 		rwqe->wqe_size = 2;
3082 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3083 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3084 						   wr, bad_wr);
3085 		rwqe->length = cpu_to_le32(length);
3086 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3087 		qp->prev_wqe_size = rwqe->wqe_size;
3088 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3089 		break;
3090 
3091 	case IB_WR_ATOMIC_CMP_AND_SWP:
3092 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3093 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3094 		awqe1->wqe_size = 4;
3095 
3096 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3097 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3098 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3099 
3100 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3101 
3102 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3103 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3104 			DMA_REGPAIR_LE(awqe3->swap_data,
3105 				       atomic_wr(wr)->compare_add);
3106 		} else {
3107 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3108 			DMA_REGPAIR_LE(awqe3->swap_data,
3109 				       atomic_wr(wr)->swap);
3110 			DMA_REGPAIR_LE(awqe3->cmp_data,
3111 				       atomic_wr(wr)->compare_add);
3112 		}
3113 
3114 		qedr_prepare_sq_sges(qp, NULL, wr);
3115 
3116 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3117 		qp->prev_wqe_size = awqe1->wqe_size;
3118 		break;
3119 
3120 	case IB_WR_LOCAL_INV:
3121 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3122 		iwqe->wqe_size = 1;
3123 
3124 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3125 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3126 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3127 		qp->prev_wqe_size = iwqe->wqe_size;
3128 		break;
3129 	case IB_WR_REG_MR:
3130 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3131 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3132 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3133 		fwqe1->wqe_size = 2;
3134 
3135 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3136 		if (rc) {
3137 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3138 			*bad_wr = wr;
3139 			break;
3140 		}
3141 
3142 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3143 		qp->prev_wqe_size = fwqe1->wqe_size;
3144 		break;
3145 	default:
3146 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3147 		rc = -EINVAL;
3148 		*bad_wr = wr;
3149 		break;
3150 	}
3151 
3152 	if (*bad_wr) {
3153 		u16 value;
3154 
3155 		/* Restore prod to its position before
3156 		 * this WR was processed
3157 		 */
3158 		value = le16_to_cpu(qp->sq.db_data.data.value);
3159 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3160 
3161 		/* Restore prev_wqe_size */
3162 		qp->prev_wqe_size = wqe->prev_wqe_size;
3163 		rc = -EINVAL;
3164 		DP_ERR(dev, "POST SEND FAILED\n");
3165 	}
3166 
3167 	return rc;
3168 }
3169 
3170 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3171 		   struct ib_send_wr **bad_wr)
3172 {
3173 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3174 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3175 	unsigned long flags;
3176 	int rc = 0;
3177 
3178 	*bad_wr = NULL;
3179 
3180 	if (qp->qp_type == IB_QPT_GSI)
3181 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3182 
3183 	spin_lock_irqsave(&qp->q_lock, flags);
3184 
3185 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3186 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3187 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3188 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3189 			spin_unlock_irqrestore(&qp->q_lock, flags);
3190 			*bad_wr = wr;
3191 			DP_DEBUG(dev, QEDR_MSG_CQ,
3192 				 "QP in wrong state! QP icid=0x%x state %d\n",
3193 				 qp->icid, qp->state);
3194 			return -EINVAL;
3195 		}
3196 	}
3197 
3198 	while (wr) {
3199 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3200 		if (rc)
3201 			break;
3202 
3203 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3204 
3205 		qedr_inc_sw_prod(&qp->sq);
3206 
3207 		qp->sq.db_data.data.value++;
3208 
3209 		wr = wr->next;
3210 	}
3211 
3212 	/* Trigger doorbell
3213 	 * If there was a failure in the first WR then it will be triggered in
3214 	 * vane. However this is not harmful (as long as the producer value is
3215 	 * unchanged). For performance reasons we avoid checking for this
3216 	 * redundant doorbell.
3217 	 *
3218 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3219 	 * soon as we give the doorbell, we could get a completion
3220 	 * for this wr, therefore we need to make sure that the
3221 	 * memory is updated before giving the doorbell.
3222 	 * During qedr_poll_cq, rmb is called before accessing the
3223 	 * cqe. This covers for the smp_rmb as well.
3224 	 */
3225 	smp_wmb();
3226 	writel(qp->sq.db_data.raw, qp->sq.db);
3227 
3228 	/* Make sure write sticks */
3229 	mmiowb();
3230 
3231 	spin_unlock_irqrestore(&qp->q_lock, flags);
3232 
3233 	return rc;
3234 }
3235 
3236 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3237 		   struct ib_recv_wr **bad_wr)
3238 {
3239 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3240 	struct qedr_dev *dev = qp->dev;
3241 	unsigned long flags;
3242 	int status = 0;
3243 
3244 	if (qp->qp_type == IB_QPT_GSI)
3245 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3246 
3247 	spin_lock_irqsave(&qp->q_lock, flags);
3248 
3249 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3250 		spin_unlock_irqrestore(&qp->q_lock, flags);
3251 		*bad_wr = wr;
3252 		return -EINVAL;
3253 	}
3254 
3255 	while (wr) {
3256 		int i;
3257 
3258 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3259 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3260 		    wr->num_sge > qp->rq.max_sges) {
3261 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3262 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3263 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3264 			       qp->rq.max_sges);
3265 			status = -ENOMEM;
3266 			*bad_wr = wr;
3267 			break;
3268 		}
3269 		for (i = 0; i < wr->num_sge; i++) {
3270 			u32 flags = 0;
3271 			struct rdma_rq_sge *rqe =
3272 			    qed_chain_produce(&qp->rq.pbl);
3273 
3274 			/* First one must include the number
3275 			 * of SGE in the list
3276 			 */
3277 			if (!i)
3278 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3279 					  wr->num_sge);
3280 
3281 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3282 				  wr->sg_list[i].lkey);
3283 
3284 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3285 				   wr->sg_list[i].length, flags);
3286 		}
3287 
3288 		/* Special case of no sges. FW requires between 1-4 sges...
3289 		 * in this case we need to post 1 sge with length zero. this is
3290 		 * because rdma write with immediate consumes an RQ.
3291 		 */
3292 		if (!wr->num_sge) {
3293 			u32 flags = 0;
3294 			struct rdma_rq_sge *rqe =
3295 			    qed_chain_produce(&qp->rq.pbl);
3296 
3297 			/* First one must include the number
3298 			 * of SGE in the list
3299 			 */
3300 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3301 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3302 
3303 			RQ_SGE_SET(rqe, 0, 0, flags);
3304 			i = 1;
3305 		}
3306 
3307 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3308 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3309 
3310 		qedr_inc_sw_prod(&qp->rq);
3311 
3312 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3313 		 * soon as we give the doorbell, we could get a completion
3314 		 * for this wr, therefore we need to make sure that the
3315 		 * memory is update before giving the doorbell.
3316 		 * During qedr_poll_cq, rmb is called before accessing the
3317 		 * cqe. This covers for the smp_rmb as well.
3318 		 */
3319 		smp_wmb();
3320 
3321 		qp->rq.db_data.data.value++;
3322 
3323 		writel(qp->rq.db_data.raw, qp->rq.db);
3324 
3325 		/* Make sure write sticks */
3326 		mmiowb();
3327 
3328 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3329 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3330 			mmiowb();	/* for second doorbell */
3331 		}
3332 
3333 		wr = wr->next;
3334 	}
3335 
3336 	spin_unlock_irqrestore(&qp->q_lock, flags);
3337 
3338 	return status;
3339 }
3340 
3341 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3342 {
3343 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3344 
3345 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3346 		cq->pbl_toggle;
3347 }
3348 
3349 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3350 {
3351 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3352 	struct qedr_qp *qp;
3353 
3354 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3355 						   resp_cqe->qp_handle.lo,
3356 						   u64);
3357 	return qp;
3358 }
3359 
3360 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3361 {
3362 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3363 
3364 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3365 }
3366 
3367 /* Return latest CQE (needs processing) */
3368 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3369 {
3370 	return cq->latest_cqe;
3371 }
3372 
3373 /* In fmr we need to increase the number of fmr completed counter for the fmr
3374  * algorithm determining whether we can free a pbl or not.
3375  * we need to perform this whether the work request was signaled or not. for
3376  * this purpose we call this function from the condition that checks if a wr
3377  * should be skipped, to make sure we don't miss it ( possibly this fmr
3378  * operation was not signalted)
3379  */
3380 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3381 {
3382 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3383 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3384 }
3385 
3386 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3387 		       struct qedr_cq *cq, int num_entries,
3388 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3389 		       int force)
3390 {
3391 	u16 cnt = 0;
3392 
3393 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3394 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3395 			qedr_chk_if_fmr(qp);
3396 			/* skip WC */
3397 			goto next_cqe;
3398 		}
3399 
3400 		/* fill WC */
3401 		wc->status = status;
3402 		wc->vendor_err = 0;
3403 		wc->wc_flags = 0;
3404 		wc->src_qp = qp->id;
3405 		wc->qp = &qp->ibqp;
3406 
3407 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3408 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3409 
3410 		switch (wc->opcode) {
3411 		case IB_WC_RDMA_WRITE:
3412 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3413 			break;
3414 		case IB_WC_COMP_SWAP:
3415 		case IB_WC_FETCH_ADD:
3416 			wc->byte_len = 8;
3417 			break;
3418 		case IB_WC_REG_MR:
3419 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3420 			break;
3421 		case IB_WC_RDMA_READ:
3422 		case IB_WC_SEND:
3423 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3424 			break;
3425 		default:
3426 			break;
3427 		}
3428 
3429 		num_entries--;
3430 		wc++;
3431 		cnt++;
3432 next_cqe:
3433 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3434 			qed_chain_consume(&qp->sq.pbl);
3435 		qedr_inc_sw_cons(&qp->sq);
3436 	}
3437 
3438 	return cnt;
3439 }
3440 
3441 static int qedr_poll_cq_req(struct qedr_dev *dev,
3442 			    struct qedr_qp *qp, struct qedr_cq *cq,
3443 			    int num_entries, struct ib_wc *wc,
3444 			    struct rdma_cqe_requester *req)
3445 {
3446 	int cnt = 0;
3447 
3448 	switch (req->status) {
3449 	case RDMA_CQE_REQ_STS_OK:
3450 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3451 				  IB_WC_SUCCESS, 0);
3452 		break;
3453 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3454 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3455 			DP_DEBUG(dev, QEDR_MSG_CQ,
3456 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3457 				 cq->icid, qp->icid);
3458 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3459 				  IB_WC_WR_FLUSH_ERR, 1);
3460 		break;
3461 	default:
3462 		/* process all WQE before the cosumer */
3463 		qp->state = QED_ROCE_QP_STATE_ERR;
3464 		cnt = process_req(dev, qp, cq, num_entries, wc,
3465 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3466 		wc += cnt;
3467 		/* if we have extra WC fill it with actual error info */
3468 		if (cnt < num_entries) {
3469 			enum ib_wc_status wc_status;
3470 
3471 			switch (req->status) {
3472 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3473 				DP_ERR(dev,
3474 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3475 				       cq->icid, qp->icid);
3476 				wc_status = IB_WC_BAD_RESP_ERR;
3477 				break;
3478 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3479 				DP_ERR(dev,
3480 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3481 				       cq->icid, qp->icid);
3482 				wc_status = IB_WC_LOC_LEN_ERR;
3483 				break;
3484 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3485 				DP_ERR(dev,
3486 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3487 				       cq->icid, qp->icid);
3488 				wc_status = IB_WC_LOC_QP_OP_ERR;
3489 				break;
3490 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3491 				DP_ERR(dev,
3492 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3493 				       cq->icid, qp->icid);
3494 				wc_status = IB_WC_LOC_PROT_ERR;
3495 				break;
3496 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3497 				DP_ERR(dev,
3498 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3499 				       cq->icid, qp->icid);
3500 				wc_status = IB_WC_MW_BIND_ERR;
3501 				break;
3502 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3503 				DP_ERR(dev,
3504 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3505 				       cq->icid, qp->icid);
3506 				wc_status = IB_WC_REM_INV_REQ_ERR;
3507 				break;
3508 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3509 				DP_ERR(dev,
3510 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3511 				       cq->icid, qp->icid);
3512 				wc_status = IB_WC_REM_ACCESS_ERR;
3513 				break;
3514 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3515 				DP_ERR(dev,
3516 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3517 				       cq->icid, qp->icid);
3518 				wc_status = IB_WC_REM_OP_ERR;
3519 				break;
3520 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3521 				DP_ERR(dev,
3522 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3523 				       cq->icid, qp->icid);
3524 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3525 				break;
3526 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3527 				DP_ERR(dev,
3528 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3529 				       cq->icid, qp->icid);
3530 				wc_status = IB_WC_RETRY_EXC_ERR;
3531 				break;
3532 			default:
3533 				DP_ERR(dev,
3534 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3535 				       cq->icid, qp->icid);
3536 				wc_status = IB_WC_GENERAL_ERR;
3537 			}
3538 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3539 					   wc_status, 1);
3540 		}
3541 	}
3542 
3543 	return cnt;
3544 }
3545 
3546 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3547 {
3548 	switch (status) {
3549 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3550 		return IB_WC_LOC_ACCESS_ERR;
3551 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3552 		return IB_WC_LOC_LEN_ERR;
3553 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3554 		return IB_WC_LOC_QP_OP_ERR;
3555 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3556 		return IB_WC_LOC_PROT_ERR;
3557 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3558 		return IB_WC_MW_BIND_ERR;
3559 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3560 		return IB_WC_REM_INV_RD_REQ_ERR;
3561 	case RDMA_CQE_RESP_STS_OK:
3562 		return IB_WC_SUCCESS;
3563 	default:
3564 		return IB_WC_GENERAL_ERR;
3565 	}
3566 }
3567 
3568 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3569 					  struct ib_wc *wc)
3570 {
3571 	wc->status = IB_WC_SUCCESS;
3572 	wc->byte_len = le32_to_cpu(resp->length);
3573 
3574 	if (resp->flags & QEDR_RESP_IMM) {
3575 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3576 		wc->wc_flags |= IB_WC_WITH_IMM;
3577 
3578 		if (resp->flags & QEDR_RESP_RDMA)
3579 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3580 
3581 		if (resp->flags & QEDR_RESP_INV)
3582 			return -EINVAL;
3583 
3584 	} else if (resp->flags & QEDR_RESP_INV) {
3585 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3586 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3587 
3588 		if (resp->flags & QEDR_RESP_RDMA)
3589 			return -EINVAL;
3590 
3591 	} else if (resp->flags & QEDR_RESP_RDMA) {
3592 		return -EINVAL;
3593 	}
3594 
3595 	return 0;
3596 }
3597 
3598 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3599 			       struct qedr_cq *cq, struct ib_wc *wc,
3600 			       struct rdma_cqe_responder *resp, u64 wr_id)
3601 {
3602 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3603 	wc->opcode = IB_WC_RECV;
3604 	wc->wc_flags = 0;
3605 
3606 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3607 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3608 			DP_ERR(dev,
3609 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3610 			       cq, cq->icid, resp->flags);
3611 
3612 	} else {
3613 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3614 		if (wc->status == IB_WC_GENERAL_ERR)
3615 			DP_ERR(dev,
3616 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3617 			       cq, cq->icid, resp->status);
3618 	}
3619 
3620 	/* Fill the rest of the WC */
3621 	wc->vendor_err = 0;
3622 	wc->src_qp = qp->id;
3623 	wc->qp = &qp->ibqp;
3624 	wc->wr_id = wr_id;
3625 }
3626 
3627 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3628 			    struct qedr_cq *cq, struct ib_wc *wc,
3629 			    struct rdma_cqe_responder *resp)
3630 {
3631 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3632 
3633 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3634 
3635 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3636 		qed_chain_consume(&qp->rq.pbl);
3637 	qedr_inc_sw_cons(&qp->rq);
3638 
3639 	return 1;
3640 }
3641 
3642 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3643 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3644 {
3645 	u16 cnt = 0;
3646 
3647 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3648 		/* fill WC */
3649 		wc->status = IB_WC_WR_FLUSH_ERR;
3650 		wc->vendor_err = 0;
3651 		wc->wc_flags = 0;
3652 		wc->src_qp = qp->id;
3653 		wc->byte_len = 0;
3654 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3655 		wc->qp = &qp->ibqp;
3656 		num_entries--;
3657 		wc++;
3658 		cnt++;
3659 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3660 			qed_chain_consume(&qp->rq.pbl);
3661 		qedr_inc_sw_cons(&qp->rq);
3662 	}
3663 
3664 	return cnt;
3665 }
3666 
3667 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3668 				 struct rdma_cqe_responder *resp, int *update)
3669 {
3670 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
3671 		consume_cqe(cq);
3672 		*update |= 1;
3673 	}
3674 }
3675 
3676 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3677 			     struct qedr_cq *cq, int num_entries,
3678 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3679 			     int *update)
3680 {
3681 	int cnt;
3682 
3683 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3684 		cnt = process_resp_flush(qp, cq, num_entries, wc,
3685 					 resp->rq_cons_or_srq_id);
3686 		try_consume_resp_cqe(cq, qp, resp, update);
3687 	} else {
3688 		cnt = process_resp_one(dev, qp, cq, wc, resp);
3689 		consume_cqe(cq);
3690 		*update |= 1;
3691 	}
3692 
3693 	return cnt;
3694 }
3695 
3696 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3697 				struct rdma_cqe_requester *req, int *update)
3698 {
3699 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3700 		consume_cqe(cq);
3701 		*update |= 1;
3702 	}
3703 }
3704 
3705 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3706 {
3707 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3708 	struct qedr_cq *cq = get_qedr_cq(ibcq);
3709 	union rdma_cqe *cqe;
3710 	u32 old_cons, new_cons;
3711 	unsigned long flags;
3712 	int update = 0;
3713 	int done = 0;
3714 
3715 	if (cq->destroyed) {
3716 		DP_ERR(dev,
3717 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3718 		       cq, cq->icid);
3719 		return 0;
3720 	}
3721 
3722 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3723 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3724 
3725 	spin_lock_irqsave(&cq->cq_lock, flags);
3726 	cqe = cq->latest_cqe;
3727 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3728 	while (num_entries && is_valid_cqe(cq, cqe)) {
3729 		struct qedr_qp *qp;
3730 		int cnt = 0;
3731 
3732 		/* prevent speculative reads of any field of CQE */
3733 		rmb();
3734 
3735 		qp = cqe_get_qp(cqe);
3736 		if (!qp) {
3737 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3738 			break;
3739 		}
3740 
3741 		wc->qp = &qp->ibqp;
3742 
3743 		switch (cqe_get_type(cqe)) {
3744 		case RDMA_CQE_TYPE_REQUESTER:
3745 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3746 					       &cqe->req);
3747 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3748 			break;
3749 		case RDMA_CQE_TYPE_RESPONDER_RQ:
3750 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3751 						&cqe->resp, &update);
3752 			break;
3753 		case RDMA_CQE_TYPE_INVALID:
3754 		default:
3755 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3756 			       cqe_get_type(cqe));
3757 		}
3758 		num_entries -= cnt;
3759 		wc += cnt;
3760 		done += cnt;
3761 
3762 		cqe = get_cqe(cq);
3763 	}
3764 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3765 
3766 	cq->cq_cons += new_cons - old_cons;
3767 
3768 	if (update)
3769 		/* doorbell notifies abount latest VALID entry,
3770 		 * but chain already point to the next INVALID one
3771 		 */
3772 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3773 
3774 	spin_unlock_irqrestore(&cq->cq_lock, flags);
3775 	return done;
3776 }
3777 
3778 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3779 		     u8 port_num,
3780 		     const struct ib_wc *in_wc,
3781 		     const struct ib_grh *in_grh,
3782 		     const struct ib_mad_hdr *mad_hdr,
3783 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3784 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3785 {
3786 	struct qedr_dev *dev = get_qedr_dev(ibdev);
3787 
3788 	DP_DEBUG(dev, QEDR_MSG_GSI,
3789 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3790 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3791 		 mad_hdr->class_specific, mad_hdr->class_version,
3792 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3793 	return IB_MAD_RESULT_SUCCESS;
3794 }
3795