xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision f79e4d5f92a129a1159c973735007d4ddc8541f3)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_roce_cm.h"
53 
54 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57 					size_t len)
58 {
59 	size_t min_len = min_t(size_t, len, udata->outlen);
60 
61 	return ib_copy_to_udata(udata, src, min_len);
62 }
63 
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
65 {
66 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67 		return -EINVAL;
68 
69 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
70 	return 0;
71 }
72 
73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
74 		      int index, union ib_gid *sgid)
75 {
76 	struct qedr_dev *dev = get_qedr_dev(ibdev);
77 
78 	memset(sgid->raw, 0, sizeof(sgid->raw));
79 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
80 
81 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
82 		 sgid->global.interface_id, sgid->global.subnet_prefix);
83 
84 	return 0;
85 }
86 
87 int qedr_query_device(struct ib_device *ibdev,
88 		      struct ib_device_attr *attr, struct ib_udata *udata)
89 {
90 	struct qedr_dev *dev = get_qedr_dev(ibdev);
91 	struct qedr_device_attr *qattr = &dev->attr;
92 
93 	if (!dev->rdma_ctx) {
94 		DP_ERR(dev,
95 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
96 		       dev->rdma_ctx);
97 		return -EINVAL;
98 	}
99 
100 	memset(attr, 0, sizeof(*attr));
101 
102 	attr->fw_ver = qattr->fw_ver;
103 	attr->sys_image_guid = qattr->sys_image_guid;
104 	attr->max_mr_size = qattr->max_mr_size;
105 	attr->page_size_cap = qattr->page_size_caps;
106 	attr->vendor_id = qattr->vendor_id;
107 	attr->vendor_part_id = qattr->vendor_part_id;
108 	attr->hw_ver = qattr->hw_ver;
109 	attr->max_qp = qattr->max_qp;
110 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
111 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
112 	    IB_DEVICE_RC_RNR_NAK_GEN |
113 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
114 
115 	attr->max_sge = qattr->max_sge;
116 	attr->max_sge_rd = qattr->max_sge;
117 	attr->max_cq = qattr->max_cq;
118 	attr->max_cqe = qattr->max_cqe;
119 	attr->max_mr = qattr->max_mr;
120 	attr->max_mw = qattr->max_mw;
121 	attr->max_pd = qattr->max_pd;
122 	attr->atomic_cap = dev->atomic_cap;
123 	attr->max_fmr = qattr->max_fmr;
124 	attr->max_map_per_fmr = 16;
125 	attr->max_qp_init_rd_atom =
126 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
127 	attr->max_qp_rd_atom =
128 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
129 		attr->max_qp_init_rd_atom);
130 
131 	attr->max_srq = qattr->max_srq;
132 	attr->max_srq_sge = qattr->max_srq_sge;
133 	attr->max_srq_wr = qattr->max_srq_wr;
134 
135 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
136 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
137 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
138 	attr->max_ah = qattr->max_ah;
139 
140 	return 0;
141 }
142 
143 #define QEDR_SPEED_SDR		(1)
144 #define QEDR_SPEED_DDR		(2)
145 #define QEDR_SPEED_QDR		(4)
146 #define QEDR_SPEED_FDR10	(8)
147 #define QEDR_SPEED_FDR		(16)
148 #define QEDR_SPEED_EDR		(32)
149 
150 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
151 					    u8 *ib_width)
152 {
153 	switch (speed) {
154 	case 1000:
155 		*ib_speed = QEDR_SPEED_SDR;
156 		*ib_width = IB_WIDTH_1X;
157 		break;
158 	case 10000:
159 		*ib_speed = QEDR_SPEED_QDR;
160 		*ib_width = IB_WIDTH_1X;
161 		break;
162 
163 	case 20000:
164 		*ib_speed = QEDR_SPEED_DDR;
165 		*ib_width = IB_WIDTH_4X;
166 		break;
167 
168 	case 25000:
169 		*ib_speed = QEDR_SPEED_EDR;
170 		*ib_width = IB_WIDTH_1X;
171 		break;
172 
173 	case 40000:
174 		*ib_speed = QEDR_SPEED_QDR;
175 		*ib_width = IB_WIDTH_4X;
176 		break;
177 
178 	case 50000:
179 		*ib_speed = QEDR_SPEED_QDR;
180 		*ib_width = IB_WIDTH_4X;
181 		break;
182 
183 	case 100000:
184 		*ib_speed = QEDR_SPEED_EDR;
185 		*ib_width = IB_WIDTH_4X;
186 		break;
187 
188 	default:
189 		/* Unsupported */
190 		*ib_speed = QEDR_SPEED_SDR;
191 		*ib_width = IB_WIDTH_1X;
192 	}
193 }
194 
195 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
196 {
197 	struct qedr_dev *dev;
198 	struct qed_rdma_port *rdma_port;
199 
200 	dev = get_qedr_dev(ibdev);
201 	if (port > 1) {
202 		DP_ERR(dev, "invalid_port=0x%x\n", port);
203 		return -EINVAL;
204 	}
205 
206 	if (!dev->rdma_ctx) {
207 		DP_ERR(dev, "rdma_ctx is NULL\n");
208 		return -EINVAL;
209 	}
210 
211 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
212 
213 	/* *attr being zeroed by the caller, avoid zeroing it here */
214 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
215 		attr->state = IB_PORT_ACTIVE;
216 		attr->phys_state = 5;
217 	} else {
218 		attr->state = IB_PORT_DOWN;
219 		attr->phys_state = 3;
220 	}
221 	attr->max_mtu = IB_MTU_4096;
222 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
223 	attr->lid = 0;
224 	attr->lmc = 0;
225 	attr->sm_lid = 0;
226 	attr->sm_sl = 0;
227 	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
228 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
229 		attr->gid_tbl_len = 1;
230 		attr->pkey_tbl_len = 1;
231 	} else {
232 		attr->gid_tbl_len = QEDR_MAX_SGID;
233 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
234 	}
235 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
236 	attr->qkey_viol_cntr = 0;
237 	get_link_speed_and_width(rdma_port->link_speed,
238 				 &attr->active_speed, &attr->active_width);
239 	attr->max_msg_sz = rdma_port->max_msg_size;
240 	attr->max_vl_num = 4;
241 
242 	return 0;
243 }
244 
245 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
246 		     struct ib_port_modify *props)
247 {
248 	struct qedr_dev *dev;
249 
250 	dev = get_qedr_dev(ibdev);
251 	if (port > 1) {
252 		DP_ERR(dev, "invalid_port=0x%x\n", port);
253 		return -EINVAL;
254 	}
255 
256 	return 0;
257 }
258 
259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
260 			 unsigned long len)
261 {
262 	struct qedr_mm *mm;
263 
264 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
265 	if (!mm)
266 		return -ENOMEM;
267 
268 	mm->key.phy_addr = phy_addr;
269 	/* This function might be called with a length which is not a multiple
270 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
271 	 * forces this granularity by increasing the requested size if needed.
272 	 * When qedr_mmap is called, it will search the list with the updated
273 	 * length as a key. To prevent search failures, the length is rounded up
274 	 * in advance to PAGE_SIZE.
275 	 */
276 	mm->key.len = roundup(len, PAGE_SIZE);
277 	INIT_LIST_HEAD(&mm->entry);
278 
279 	mutex_lock(&uctx->mm_list_lock);
280 	list_add(&mm->entry, &uctx->mm_head);
281 	mutex_unlock(&uctx->mm_list_lock);
282 
283 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
284 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
285 		 (unsigned long long)mm->key.phy_addr,
286 		 (unsigned long)mm->key.len, uctx);
287 
288 	return 0;
289 }
290 
291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
292 			     unsigned long len)
293 {
294 	bool found = false;
295 	struct qedr_mm *mm;
296 
297 	mutex_lock(&uctx->mm_list_lock);
298 	list_for_each_entry(mm, &uctx->mm_head, entry) {
299 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
300 			continue;
301 
302 		found = true;
303 		break;
304 	}
305 	mutex_unlock(&uctx->mm_list_lock);
306 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
307 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
308 		 mm->key.phy_addr, mm->key.len, uctx, found);
309 
310 	return found;
311 }
312 
313 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
314 					struct ib_udata *udata)
315 {
316 	int rc;
317 	struct qedr_ucontext *ctx;
318 	struct qedr_alloc_ucontext_resp uresp;
319 	struct qedr_dev *dev = get_qedr_dev(ibdev);
320 	struct qed_rdma_add_user_out_params oparams;
321 
322 	if (!udata)
323 		return ERR_PTR(-EFAULT);
324 
325 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
326 	if (!ctx)
327 		return ERR_PTR(-ENOMEM);
328 
329 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
330 	if (rc) {
331 		DP_ERR(dev,
332 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
333 		       rc);
334 		goto err;
335 	}
336 
337 	ctx->dpi = oparams.dpi;
338 	ctx->dpi_addr = oparams.dpi_addr;
339 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
340 	ctx->dpi_size = oparams.dpi_size;
341 	INIT_LIST_HEAD(&ctx->mm_head);
342 	mutex_init(&ctx->mm_list_lock);
343 
344 	memset(&uresp, 0, sizeof(uresp));
345 
346 	uresp.dpm_enabled = dev->user_dpm_enabled;
347 	uresp.wids_enabled = 1;
348 	uresp.wid_count = oparams.wid_count;
349 	uresp.db_pa = ctx->dpi_phys_addr;
350 	uresp.db_size = ctx->dpi_size;
351 	uresp.max_send_wr = dev->attr.max_sqe;
352 	uresp.max_recv_wr = dev->attr.max_rqe;
353 	uresp.max_srq_wr = dev->attr.max_srq_wr;
354 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
355 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
356 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
357 	uresp.max_cqes = QEDR_MAX_CQES;
358 
359 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
360 	if (rc)
361 		goto err;
362 
363 	ctx->dev = dev;
364 
365 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
366 	if (rc)
367 		goto err;
368 
369 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
370 		 &ctx->ibucontext);
371 	return &ctx->ibucontext;
372 
373 err:
374 	kfree(ctx);
375 	return ERR_PTR(rc);
376 }
377 
378 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
379 {
380 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
381 	struct qedr_mm *mm, *tmp;
382 	int status = 0;
383 
384 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
385 		 uctx);
386 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
387 
388 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
389 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
390 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
391 			 mm->key.phy_addr, mm->key.len, uctx);
392 		list_del(&mm->entry);
393 		kfree(mm);
394 	}
395 
396 	kfree(uctx);
397 	return status;
398 }
399 
400 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
401 {
402 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
403 	struct qedr_dev *dev = get_qedr_dev(context->device);
404 	unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
405 	unsigned long len = (vma->vm_end - vma->vm_start);
406 	unsigned long dpi_start;
407 
408 	dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
409 
410 	DP_DEBUG(dev, QEDR_MSG_INIT,
411 		 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
412 		 (void *)vma->vm_start, (void *)vma->vm_end,
413 		 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
414 
415 	if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
416 		DP_ERR(dev,
417 		       "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
418 		       (void *)vma->vm_start, (void *)vma->vm_end);
419 		return -EINVAL;
420 	}
421 
422 	if (!qedr_search_mmap(ucontext, phys_addr, len)) {
423 		DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
424 		       vma->vm_pgoff);
425 		return -EINVAL;
426 	}
427 
428 	if (phys_addr < dpi_start ||
429 	    ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
430 		DP_ERR(dev,
431 		       "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
432 		       (void *)phys_addr, (void *)dpi_start,
433 		       ucontext->dpi_size);
434 		return -EINVAL;
435 	}
436 
437 	if (vma->vm_flags & VM_READ) {
438 		DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
439 		return -EINVAL;
440 	}
441 
442 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
443 	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
444 				  vma->vm_page_prot);
445 }
446 
447 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
448 			    struct ib_ucontext *context, struct ib_udata *udata)
449 {
450 	struct qedr_dev *dev = get_qedr_dev(ibdev);
451 	struct qedr_pd *pd;
452 	u16 pd_id;
453 	int rc;
454 
455 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
456 		 (udata && context) ? "User Lib" : "Kernel");
457 
458 	if (!dev->rdma_ctx) {
459 		DP_ERR(dev, "invalid RDMA context\n");
460 		return ERR_PTR(-EINVAL);
461 	}
462 
463 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
464 	if (!pd)
465 		return ERR_PTR(-ENOMEM);
466 
467 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
468 	if (rc)
469 		goto err;
470 
471 	pd->pd_id = pd_id;
472 
473 	if (udata && context) {
474 		struct qedr_alloc_pd_uresp uresp = {
475 			.pd_id = pd_id,
476 		};
477 
478 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
479 		if (rc) {
480 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
481 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
482 			goto err;
483 		}
484 
485 		pd->uctx = get_qedr_ucontext(context);
486 		pd->uctx->pd = pd;
487 	}
488 
489 	return &pd->ibpd;
490 
491 err:
492 	kfree(pd);
493 	return ERR_PTR(rc);
494 }
495 
496 int qedr_dealloc_pd(struct ib_pd *ibpd)
497 {
498 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
499 	struct qedr_pd *pd = get_qedr_pd(ibpd);
500 
501 	if (!pd) {
502 		pr_err("Invalid PD received in dealloc_pd\n");
503 		return -EINVAL;
504 	}
505 
506 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
507 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
508 
509 	kfree(pd);
510 
511 	return 0;
512 }
513 
514 static void qedr_free_pbl(struct qedr_dev *dev,
515 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
516 {
517 	struct pci_dev *pdev = dev->pdev;
518 	int i;
519 
520 	for (i = 0; i < pbl_info->num_pbls; i++) {
521 		if (!pbl[i].va)
522 			continue;
523 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
524 				  pbl[i].va, pbl[i].pa);
525 	}
526 
527 	kfree(pbl);
528 }
529 
530 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
531 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
532 
533 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
534 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
535 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
536 
537 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
538 					   struct qedr_pbl_info *pbl_info,
539 					   gfp_t flags)
540 {
541 	struct pci_dev *pdev = dev->pdev;
542 	struct qedr_pbl *pbl_table;
543 	dma_addr_t *pbl_main_tbl;
544 	dma_addr_t pa;
545 	void *va;
546 	int i;
547 
548 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
549 	if (!pbl_table)
550 		return ERR_PTR(-ENOMEM);
551 
552 	for (i = 0; i < pbl_info->num_pbls; i++) {
553 		va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
554 					 &pa, flags);
555 		if (!va)
556 			goto err;
557 
558 		pbl_table[i].va = va;
559 		pbl_table[i].pa = pa;
560 	}
561 
562 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
563 	 * the first one with physical pointers to all of the rest
564 	 */
565 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
566 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
567 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
568 
569 	return pbl_table;
570 
571 err:
572 	for (i--; i >= 0; i--)
573 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
574 				  pbl_table[i].va, pbl_table[i].pa);
575 
576 	qedr_free_pbl(dev, pbl_info, pbl_table);
577 
578 	return ERR_PTR(-ENOMEM);
579 }
580 
581 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
582 				struct qedr_pbl_info *pbl_info,
583 				u32 num_pbes, int two_layer_capable)
584 {
585 	u32 pbl_capacity;
586 	u32 pbl_size;
587 	u32 num_pbls;
588 
589 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
590 		if (num_pbes > MAX_PBES_TWO_LAYER) {
591 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
592 			       num_pbes);
593 			return -EINVAL;
594 		}
595 
596 		/* calculate required pbl page size */
597 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
598 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
599 			       NUM_PBES_ON_PAGE(pbl_size);
600 
601 		while (pbl_capacity < num_pbes) {
602 			pbl_size *= 2;
603 			pbl_capacity = pbl_size / sizeof(u64);
604 			pbl_capacity = pbl_capacity * pbl_capacity;
605 		}
606 
607 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
608 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
609 		pbl_info->two_layered = true;
610 	} else {
611 		/* One layered PBL */
612 		num_pbls = 1;
613 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
614 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
615 		pbl_info->two_layered = false;
616 	}
617 
618 	pbl_info->num_pbls = num_pbls;
619 	pbl_info->pbl_size = pbl_size;
620 	pbl_info->num_pbes = num_pbes;
621 
622 	DP_DEBUG(dev, QEDR_MSG_MR,
623 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
624 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
625 
626 	return 0;
627 }
628 
629 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
630 			       struct qedr_pbl *pbl,
631 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
632 {
633 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
634 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
635 	struct qedr_pbl *pbl_tbl;
636 	struct scatterlist *sg;
637 	struct regpair *pbe;
638 	u64 pg_addr;
639 	int entry;
640 
641 	if (!pbl_info->num_pbes)
642 		return;
643 
644 	/* If we have a two layered pbl, the first pbl points to the rest
645 	 * of the pbls and the first entry lays on the second pbl in the table
646 	 */
647 	if (pbl_info->two_layered)
648 		pbl_tbl = &pbl[1];
649 	else
650 		pbl_tbl = pbl;
651 
652 	pbe = (struct regpair *)pbl_tbl->va;
653 	if (!pbe) {
654 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
655 		return;
656 	}
657 
658 	pbe_cnt = 0;
659 
660 	shift = umem->page_shift;
661 
662 	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
663 
664 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
665 		pages = sg_dma_len(sg) >> shift;
666 		pg_addr = sg_dma_address(sg);
667 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
668 			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
669 				pbe->lo = cpu_to_le32(pg_addr);
670 				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
671 
672 				pg_addr += BIT(pg_shift);
673 				pbe_cnt++;
674 				total_num_pbes++;
675 				pbe++;
676 
677 				if (total_num_pbes == pbl_info->num_pbes)
678 					return;
679 
680 				/* If the given pbl is full storing the pbes,
681 				 * move to next pbl.
682 				 */
683 				if (pbe_cnt ==
684 				    (pbl_info->pbl_size / sizeof(u64))) {
685 					pbl_tbl++;
686 					pbe = (struct regpair *)pbl_tbl->va;
687 					pbe_cnt = 0;
688 				}
689 
690 				fw_pg_cnt++;
691 			}
692 		}
693 	}
694 }
695 
696 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
697 			      struct qedr_cq *cq, struct ib_udata *udata)
698 {
699 	struct qedr_create_cq_uresp uresp;
700 	int rc;
701 
702 	memset(&uresp, 0, sizeof(uresp));
703 
704 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
705 	uresp.icid = cq->icid;
706 
707 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
708 	if (rc)
709 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
710 
711 	return rc;
712 }
713 
714 static void consume_cqe(struct qedr_cq *cq)
715 {
716 	if (cq->latest_cqe == cq->toggle_cqe)
717 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
718 
719 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
720 }
721 
722 static inline int qedr_align_cq_entries(int entries)
723 {
724 	u64 size, aligned_size;
725 
726 	/* We allocate an extra entry that we don't report to the FW. */
727 	size = (entries + 1) * QEDR_CQE_SIZE;
728 	aligned_size = ALIGN(size, PAGE_SIZE);
729 
730 	return aligned_size / QEDR_CQE_SIZE;
731 }
732 
733 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
734 				       struct qedr_dev *dev,
735 				       struct qedr_userq *q,
736 				       u64 buf_addr, size_t buf_len,
737 				       int access, int dmasync,
738 				       int alloc_and_init)
739 {
740 	u32 fw_pages;
741 	int rc;
742 
743 	q->buf_addr = buf_addr;
744 	q->buf_len = buf_len;
745 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
746 	if (IS_ERR(q->umem)) {
747 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
748 		       PTR_ERR(q->umem));
749 		return PTR_ERR(q->umem);
750 	}
751 
752 	fw_pages = ib_umem_page_count(q->umem) <<
753 	    (q->umem->page_shift - FW_PAGE_SHIFT);
754 
755 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
756 	if (rc)
757 		goto err0;
758 
759 	if (alloc_and_init) {
760 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
761 		if (IS_ERR(q->pbl_tbl)) {
762 			rc = PTR_ERR(q->pbl_tbl);
763 			goto err0;
764 		}
765 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
766 				   FW_PAGE_SHIFT);
767 	} else {
768 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
769 		if (!q->pbl_tbl) {
770 			rc = -ENOMEM;
771 			goto err0;
772 		}
773 	}
774 
775 	return 0;
776 
777 err0:
778 	ib_umem_release(q->umem);
779 	q->umem = NULL;
780 
781 	return rc;
782 }
783 
784 static inline void qedr_init_cq_params(struct qedr_cq *cq,
785 				       struct qedr_ucontext *ctx,
786 				       struct qedr_dev *dev, int vector,
787 				       int chain_entries, int page_cnt,
788 				       u64 pbl_ptr,
789 				       struct qed_rdma_create_cq_in_params
790 				       *params)
791 {
792 	memset(params, 0, sizeof(*params));
793 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
794 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
795 	params->cnq_id = vector;
796 	params->cq_size = chain_entries - 1;
797 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
798 	params->pbl_num_pages = page_cnt;
799 	params->pbl_ptr = pbl_ptr;
800 	params->pbl_two_level = 0;
801 }
802 
803 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
804 {
805 	cq->db.data.agg_flags = flags;
806 	cq->db.data.value = cpu_to_le32(cons);
807 	writeq(cq->db.raw, cq->db_addr);
808 
809 	/* Make sure write would stick */
810 	mmiowb();
811 }
812 
813 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
814 {
815 	struct qedr_cq *cq = get_qedr_cq(ibcq);
816 	unsigned long sflags;
817 	struct qedr_dev *dev;
818 
819 	dev = get_qedr_dev(ibcq->device);
820 
821 	if (cq->destroyed) {
822 		DP_ERR(dev,
823 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
824 		       cq, cq->icid);
825 		return -EINVAL;
826 	}
827 
828 
829 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
830 		return 0;
831 
832 	spin_lock_irqsave(&cq->cq_lock, sflags);
833 
834 	cq->arm_flags = 0;
835 
836 	if (flags & IB_CQ_SOLICITED)
837 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
838 
839 	if (flags & IB_CQ_NEXT_COMP)
840 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
841 
842 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
843 
844 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
845 
846 	return 0;
847 }
848 
849 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
850 			     const struct ib_cq_init_attr *attr,
851 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
852 {
853 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
854 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
855 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
856 	struct qedr_dev *dev = get_qedr_dev(ibdev);
857 	struct qed_rdma_create_cq_in_params params;
858 	struct qedr_create_cq_ureq ureq;
859 	int vector = attr->comp_vector;
860 	int entries = attr->cqe;
861 	struct qedr_cq *cq;
862 	int chain_entries;
863 	int page_cnt;
864 	u64 pbl_ptr;
865 	u16 icid;
866 	int rc;
867 
868 	DP_DEBUG(dev, QEDR_MSG_INIT,
869 		 "create_cq: called from %s. entries=%d, vector=%d\n",
870 		 udata ? "User Lib" : "Kernel", entries, vector);
871 
872 	if (entries > QEDR_MAX_CQES) {
873 		DP_ERR(dev,
874 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
875 		       entries, QEDR_MAX_CQES);
876 		return ERR_PTR(-EINVAL);
877 	}
878 
879 	chain_entries = qedr_align_cq_entries(entries);
880 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
881 
882 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
883 	if (!cq)
884 		return ERR_PTR(-ENOMEM);
885 
886 	if (udata) {
887 		memset(&ureq, 0, sizeof(ureq));
888 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
889 			DP_ERR(dev,
890 			       "create cq: problem copying data from user space\n");
891 			goto err0;
892 		}
893 
894 		if (!ureq.len) {
895 			DP_ERR(dev,
896 			       "create cq: cannot create a cq with 0 entries\n");
897 			goto err0;
898 		}
899 
900 		cq->cq_type = QEDR_CQ_TYPE_USER;
901 
902 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
903 					  ureq.len, IB_ACCESS_LOCAL_WRITE,
904 					  1, 1);
905 		if (rc)
906 			goto err0;
907 
908 		pbl_ptr = cq->q.pbl_tbl->pa;
909 		page_cnt = cq->q.pbl_info.num_pbes;
910 
911 		cq->ibcq.cqe = chain_entries;
912 	} else {
913 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
914 
915 		rc = dev->ops->common->chain_alloc(dev->cdev,
916 						   QED_CHAIN_USE_TO_CONSUME,
917 						   QED_CHAIN_MODE_PBL,
918 						   QED_CHAIN_CNT_TYPE_U32,
919 						   chain_entries,
920 						   sizeof(union rdma_cqe),
921 						   &cq->pbl, NULL);
922 		if (rc)
923 			goto err1;
924 
925 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
926 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
927 		cq->ibcq.cqe = cq->pbl.capacity;
928 	}
929 
930 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
931 			    pbl_ptr, &params);
932 
933 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
934 	if (rc)
935 		goto err2;
936 
937 	cq->icid = icid;
938 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
939 	spin_lock_init(&cq->cq_lock);
940 
941 	if (ib_ctx) {
942 		rc = qedr_copy_cq_uresp(dev, cq, udata);
943 		if (rc)
944 			goto err3;
945 	} else {
946 		/* Generate doorbell address. */
947 		cq->db_addr = dev->db_addr +
948 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
949 		cq->db.data.icid = cq->icid;
950 		cq->db.data.params = DB_AGG_CMD_SET <<
951 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
952 
953 		/* point to the very last element, passing it we will toggle */
954 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
955 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
956 		cq->latest_cqe = NULL;
957 		consume_cqe(cq);
958 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
959 	}
960 
961 	DP_DEBUG(dev, QEDR_MSG_CQ,
962 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
963 		 cq->icid, cq, params.cq_size);
964 
965 	return &cq->ibcq;
966 
967 err3:
968 	destroy_iparams.icid = cq->icid;
969 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
970 				  &destroy_oparams);
971 err2:
972 	if (udata)
973 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
974 	else
975 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
976 err1:
977 	if (udata)
978 		ib_umem_release(cq->q.umem);
979 err0:
980 	kfree(cq);
981 	return ERR_PTR(-EINVAL);
982 }
983 
984 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
985 {
986 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
987 	struct qedr_cq *cq = get_qedr_cq(ibcq);
988 
989 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
990 
991 	return 0;
992 }
993 
994 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
995 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
996 
997 int qedr_destroy_cq(struct ib_cq *ibcq)
998 {
999 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1000 	struct qed_rdma_destroy_cq_out_params oparams;
1001 	struct qed_rdma_destroy_cq_in_params iparams;
1002 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1003 	int iter;
1004 	int rc;
1005 
1006 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1007 
1008 	cq->destroyed = 1;
1009 
1010 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1011 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1012 		goto done;
1013 
1014 	iparams.icid = cq->icid;
1015 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1016 	if (rc)
1017 		return rc;
1018 
1019 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1020 
1021 	if (ibcq->uobject && ibcq->uobject->context) {
1022 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1023 		ib_umem_release(cq->q.umem);
1024 	}
1025 
1026 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1027 	 * wait until all CNQ interrupts, if any, are received. This will always
1028 	 * happen and will always happen very fast. If not, then a serious error
1029 	 * has occured. That is why we can use a long delay.
1030 	 * We spin for a short time so we don’t lose time on context switching
1031 	 * in case all the completions are handled in that span. Otherwise
1032 	 * we sleep for a while and check again. Since the CNQ may be
1033 	 * associated with (only) the current CPU we use msleep to allow the
1034 	 * current CPU to be freed.
1035 	 * The CNQ notification is increased in qedr_irq_handler().
1036 	 */
1037 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1038 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1039 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1040 		iter--;
1041 	}
1042 
1043 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1044 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1045 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1046 		iter--;
1047 	}
1048 
1049 	if (oparams.num_cq_notif != cq->cnq_notif)
1050 		goto err;
1051 
1052 	/* Note that we don't need to have explicit code to wait for the
1053 	 * completion of the event handler because it is invoked from the EQ.
1054 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1055 	 * be certain that there's no event handler in process.
1056 	 */
1057 done:
1058 	cq->sig = ~cq->sig;
1059 
1060 	kfree(cq);
1061 
1062 	return 0;
1063 
1064 err:
1065 	DP_ERR(dev,
1066 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1067 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1068 
1069 	return -EINVAL;
1070 }
1071 
1072 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1073 					  struct ib_qp_attr *attr,
1074 					  int attr_mask,
1075 					  struct qed_rdma_modify_qp_in_params
1076 					  *qp_params)
1077 {
1078 	enum rdma_network_type nw_type;
1079 	struct ib_gid_attr gid_attr;
1080 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1081 	union ib_gid gid;
1082 	u32 ipv4_addr;
1083 	int rc = 0;
1084 	int i;
1085 
1086 	rc = ib_get_cached_gid(ibqp->device,
1087 			       rdma_ah_get_port_num(&attr->ah_attr),
1088 			       grh->sgid_index, &gid, &gid_attr);
1089 	if (rc)
1090 		return rc;
1091 
1092 	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1093 
1094 	dev_put(gid_attr.ndev);
1095 	nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1096 	switch (nw_type) {
1097 	case RDMA_NETWORK_IPV6:
1098 		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1099 		       sizeof(qp_params->sgid));
1100 		memcpy(&qp_params->dgid.bytes[0],
1101 		       &grh->dgid,
1102 		       sizeof(qp_params->dgid));
1103 		qp_params->roce_mode = ROCE_V2_IPV6;
1104 		SET_FIELD(qp_params->modify_flags,
1105 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1106 		break;
1107 	case RDMA_NETWORK_IB:
1108 		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1109 		       sizeof(qp_params->sgid));
1110 		memcpy(&qp_params->dgid.bytes[0],
1111 		       &grh->dgid,
1112 		       sizeof(qp_params->dgid));
1113 		qp_params->roce_mode = ROCE_V1;
1114 		break;
1115 	case RDMA_NETWORK_IPV4:
1116 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1117 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1118 		ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1119 		qp_params->sgid.ipv4_addr = ipv4_addr;
1120 		ipv4_addr =
1121 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1122 		qp_params->dgid.ipv4_addr = ipv4_addr;
1123 		SET_FIELD(qp_params->modify_flags,
1124 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1125 		qp_params->roce_mode = ROCE_V2_IPV4;
1126 		break;
1127 	}
1128 
1129 	for (i = 0; i < 4; i++) {
1130 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1131 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1132 	}
1133 
1134 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1135 		qp_params->vlan_id = 0;
1136 
1137 	return 0;
1138 }
1139 
1140 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1141 			       struct ib_qp_init_attr *attrs)
1142 {
1143 	struct qedr_device_attr *qattr = &dev->attr;
1144 
1145 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1146 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1147 		DP_DEBUG(dev, QEDR_MSG_QP,
1148 			 "create qp: unsupported qp type=0x%x requested\n",
1149 			 attrs->qp_type);
1150 		return -EINVAL;
1151 	}
1152 
1153 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1154 		DP_ERR(dev,
1155 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1156 		       attrs->cap.max_send_wr, qattr->max_sqe);
1157 		return -EINVAL;
1158 	}
1159 
1160 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1161 		DP_ERR(dev,
1162 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1163 		       attrs->cap.max_inline_data, qattr->max_inline);
1164 		return -EINVAL;
1165 	}
1166 
1167 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1168 		DP_ERR(dev,
1169 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1170 		       attrs->cap.max_send_sge, qattr->max_sge);
1171 		return -EINVAL;
1172 	}
1173 
1174 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1175 		DP_ERR(dev,
1176 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1177 		       attrs->cap.max_recv_sge, qattr->max_sge);
1178 		return -EINVAL;
1179 	}
1180 
1181 	/* Unprivileged user space cannot create special QP */
1182 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1183 		DP_ERR(dev,
1184 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1185 		       attrs->qp_type);
1186 		return -EINVAL;
1187 	}
1188 
1189 	return 0;
1190 }
1191 
1192 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1193 			       struct qedr_create_qp_uresp *uresp,
1194 			       struct qedr_qp *qp)
1195 {
1196 	/* iWARP requires two doorbells per RQ. */
1197 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1198 		uresp->rq_db_offset =
1199 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1200 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1201 	} else {
1202 		uresp->rq_db_offset =
1203 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1204 	}
1205 
1206 	uresp->rq_icid = qp->icid;
1207 }
1208 
1209 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1210 			       struct qedr_create_qp_uresp *uresp,
1211 			       struct qedr_qp *qp)
1212 {
1213 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1214 
1215 	/* iWARP uses the same cid for rq and sq */
1216 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1217 		uresp->sq_icid = qp->icid;
1218 	else
1219 		uresp->sq_icid = qp->icid + 1;
1220 }
1221 
1222 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1223 			      struct qedr_qp *qp, struct ib_udata *udata)
1224 {
1225 	struct qedr_create_qp_uresp uresp;
1226 	int rc;
1227 
1228 	memset(&uresp, 0, sizeof(uresp));
1229 	qedr_copy_sq_uresp(dev, &uresp, qp);
1230 	qedr_copy_rq_uresp(dev, &uresp, qp);
1231 
1232 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1233 	uresp.qp_id = qp->qp_id;
1234 
1235 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1236 	if (rc)
1237 		DP_ERR(dev,
1238 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1239 		       qp->icid);
1240 
1241 	return rc;
1242 }
1243 
1244 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1245 				      struct qedr_qp *qp,
1246 				      struct qedr_pd *pd,
1247 				      struct ib_qp_init_attr *attrs)
1248 {
1249 	spin_lock_init(&qp->q_lock);
1250 	atomic_set(&qp->refcnt, 1);
1251 	qp->pd = pd;
1252 	qp->qp_type = attrs->qp_type;
1253 	qp->max_inline_data = attrs->cap.max_inline_data;
1254 	qp->sq.max_sges = attrs->cap.max_send_sge;
1255 	qp->state = QED_ROCE_QP_STATE_RESET;
1256 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1257 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1258 	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1259 	qp->dev = dev;
1260 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1261 
1262 	DP_DEBUG(dev, QEDR_MSG_QP,
1263 		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1264 		 qp->rq.max_sges, qp->rq_cq->icid);
1265 	DP_DEBUG(dev, QEDR_MSG_QP,
1266 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1267 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1268 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1269 	DP_DEBUG(dev, QEDR_MSG_QP,
1270 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1271 		 qp->sq.max_sges, qp->sq_cq->icid);
1272 }
1273 
1274 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1275 {
1276 	qp->sq.db = dev->db_addr +
1277 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1278 	qp->sq.db_data.data.icid = qp->icid + 1;
1279 	qp->rq.db = dev->db_addr +
1280 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1281 	qp->rq.db_data.data.icid = qp->icid;
1282 }
1283 
1284 static inline void
1285 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1286 			      struct qedr_pd *pd,
1287 			      struct qedr_qp *qp,
1288 			      struct ib_qp_init_attr *attrs,
1289 			      bool fmr_and_reserved_lkey,
1290 			      struct qed_rdma_create_qp_in_params *params)
1291 {
1292 	/* QP handle to be written in an async event */
1293 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1294 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1295 
1296 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1297 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1298 	params->pd = pd->pd_id;
1299 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1300 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1301 	params->stats_queue = 0;
1302 	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1303 	params->srq_id = 0;
1304 	params->use_srq = false;
1305 }
1306 
1307 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1308 {
1309 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1310 		 "qp=%p. "
1311 		 "sq_addr=0x%llx, "
1312 		 "sq_len=%zd, "
1313 		 "rq_addr=0x%llx, "
1314 		 "rq_len=%zd"
1315 		 "\n",
1316 		 qp,
1317 		 qp->usq.buf_addr,
1318 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1319 }
1320 
1321 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
1322 {
1323 	int rc;
1324 
1325 	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1326 		return 0;
1327 
1328 	idr_preload(GFP_KERNEL);
1329 	spin_lock_irq(&dev->idr_lock);
1330 
1331 	rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
1332 
1333 	spin_unlock_irq(&dev->idr_lock);
1334 	idr_preload_end();
1335 
1336 	return rc < 0 ? rc : 0;
1337 }
1338 
1339 static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
1340 {
1341 	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1342 		return;
1343 
1344 	spin_lock_irq(&dev->idr_lock);
1345 	idr_remove(&dev->qpidr, id);
1346 	spin_unlock_irq(&dev->idr_lock);
1347 }
1348 
1349 static inline void
1350 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1351 			    struct qedr_qp *qp,
1352 			    struct qed_rdma_create_qp_out_params *out_params)
1353 {
1354 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1355 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1356 
1357 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1358 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1359 
1360 	qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1361 	qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1362 
1363 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1364 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1365 }
1366 
1367 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1368 {
1369 	if (qp->usq.umem)
1370 		ib_umem_release(qp->usq.umem);
1371 	qp->usq.umem = NULL;
1372 
1373 	if (qp->urq.umem)
1374 		ib_umem_release(qp->urq.umem);
1375 	qp->urq.umem = NULL;
1376 }
1377 
1378 static int qedr_create_user_qp(struct qedr_dev *dev,
1379 			       struct qedr_qp *qp,
1380 			       struct ib_pd *ibpd,
1381 			       struct ib_udata *udata,
1382 			       struct ib_qp_init_attr *attrs)
1383 {
1384 	struct qed_rdma_create_qp_in_params in_params;
1385 	struct qed_rdma_create_qp_out_params out_params;
1386 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1387 	struct ib_ucontext *ib_ctx = NULL;
1388 	struct qedr_create_qp_ureq ureq;
1389 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1390 	int rc = -EINVAL;
1391 
1392 	ib_ctx = ibpd->uobject->context;
1393 
1394 	memset(&ureq, 0, sizeof(ureq));
1395 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1396 	if (rc) {
1397 		DP_ERR(dev, "Problem copying data from user space\n");
1398 		return rc;
1399 	}
1400 
1401 	/* SQ - read access only (0), dma sync not required (0) */
1402 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1403 				  ureq.sq_len, 0, 0, alloc_and_init);
1404 	if (rc)
1405 		return rc;
1406 
1407 	/* RQ - read access only (0), dma sync not required (0) */
1408 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1409 				  ureq.rq_len, 0, 0, alloc_and_init);
1410 	if (rc)
1411 		return rc;
1412 
1413 	memset(&in_params, 0, sizeof(in_params));
1414 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1415 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1416 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1417 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1418 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1419 	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1420 	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1421 
1422 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1423 					      &in_params, &out_params);
1424 
1425 	if (!qp->qed_qp) {
1426 		rc = -ENOMEM;
1427 		goto err1;
1428 	}
1429 
1430 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1431 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1432 
1433 	qp->qp_id = out_params.qp_id;
1434 	qp->icid = out_params.icid;
1435 
1436 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1437 	if (rc)
1438 		goto err;
1439 
1440 	qedr_qp_user_print(dev, qp);
1441 
1442 	return 0;
1443 err:
1444 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1445 	if (rc)
1446 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1447 
1448 err1:
1449 	qedr_cleanup_user(dev, qp);
1450 	return rc;
1451 }
1452 
1453 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1454 {
1455 	qp->sq.db = dev->db_addr +
1456 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1457 	qp->sq.db_data.data.icid = qp->icid;
1458 
1459 	qp->rq.db = dev->db_addr +
1460 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1461 	qp->rq.db_data.data.icid = qp->icid;
1462 	qp->rq.iwarp_db2 = dev->db_addr +
1463 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1464 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1465 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1466 }
1467 
1468 static int
1469 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1470 			   struct qedr_qp *qp,
1471 			   struct qed_rdma_create_qp_in_params *in_params,
1472 			   u32 n_sq_elems, u32 n_rq_elems)
1473 {
1474 	struct qed_rdma_create_qp_out_params out_params;
1475 	int rc;
1476 
1477 	rc = dev->ops->common->chain_alloc(dev->cdev,
1478 					   QED_CHAIN_USE_TO_PRODUCE,
1479 					   QED_CHAIN_MODE_PBL,
1480 					   QED_CHAIN_CNT_TYPE_U32,
1481 					   n_sq_elems,
1482 					   QEDR_SQE_ELEMENT_SIZE,
1483 					   &qp->sq.pbl, NULL);
1484 
1485 	if (rc)
1486 		return rc;
1487 
1488 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1489 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1490 
1491 	rc = dev->ops->common->chain_alloc(dev->cdev,
1492 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1493 					   QED_CHAIN_MODE_PBL,
1494 					   QED_CHAIN_CNT_TYPE_U32,
1495 					   n_rq_elems,
1496 					   QEDR_RQE_ELEMENT_SIZE,
1497 					   &qp->rq.pbl, NULL);
1498 	if (rc)
1499 		return rc;
1500 
1501 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1502 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1503 
1504 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1505 					      in_params, &out_params);
1506 
1507 	if (!qp->qed_qp)
1508 		return -EINVAL;
1509 
1510 	qp->qp_id = out_params.qp_id;
1511 	qp->icid = out_params.icid;
1512 
1513 	qedr_set_roce_db_info(dev, qp);
1514 	return rc;
1515 }
1516 
1517 static int
1518 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1519 			    struct qedr_qp *qp,
1520 			    struct qed_rdma_create_qp_in_params *in_params,
1521 			    u32 n_sq_elems, u32 n_rq_elems)
1522 {
1523 	struct qed_rdma_create_qp_out_params out_params;
1524 	struct qed_chain_ext_pbl ext_pbl;
1525 	int rc;
1526 
1527 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1528 						     QEDR_SQE_ELEMENT_SIZE,
1529 						     QED_CHAIN_MODE_PBL);
1530 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1531 						     QEDR_RQE_ELEMENT_SIZE,
1532 						     QED_CHAIN_MODE_PBL);
1533 
1534 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1535 					      in_params, &out_params);
1536 
1537 	if (!qp->qed_qp)
1538 		return -EINVAL;
1539 
1540 	/* Now we allocate the chain */
1541 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1542 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1543 
1544 	rc = dev->ops->common->chain_alloc(dev->cdev,
1545 					   QED_CHAIN_USE_TO_PRODUCE,
1546 					   QED_CHAIN_MODE_PBL,
1547 					   QED_CHAIN_CNT_TYPE_U32,
1548 					   n_sq_elems,
1549 					   QEDR_SQE_ELEMENT_SIZE,
1550 					   &qp->sq.pbl, &ext_pbl);
1551 
1552 	if (rc)
1553 		goto err;
1554 
1555 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1556 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1557 
1558 	rc = dev->ops->common->chain_alloc(dev->cdev,
1559 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1560 					   QED_CHAIN_MODE_PBL,
1561 					   QED_CHAIN_CNT_TYPE_U32,
1562 					   n_rq_elems,
1563 					   QEDR_RQE_ELEMENT_SIZE,
1564 					   &qp->rq.pbl, &ext_pbl);
1565 
1566 	if (rc)
1567 		goto err;
1568 
1569 	qp->qp_id = out_params.qp_id;
1570 	qp->icid = out_params.icid;
1571 
1572 	qedr_set_iwarp_db_info(dev, qp);
1573 	return rc;
1574 
1575 err:
1576 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1577 
1578 	return rc;
1579 }
1580 
1581 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1582 {
1583 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1584 	kfree(qp->wqe_wr_id);
1585 
1586 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1587 	kfree(qp->rqe_wr_id);
1588 }
1589 
1590 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1591 				 struct qedr_qp *qp,
1592 				 struct ib_pd *ibpd,
1593 				 struct ib_qp_init_attr *attrs)
1594 {
1595 	struct qed_rdma_create_qp_in_params in_params;
1596 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1597 	int rc = -EINVAL;
1598 	u32 n_rq_elems;
1599 	u32 n_sq_elems;
1600 	u32 n_sq_entries;
1601 
1602 	memset(&in_params, 0, sizeof(in_params));
1603 
1604 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1605 	 * the ring. The ring should allow at least a single WR, even if the
1606 	 * user requested none, due to allocation issues.
1607 	 * We should add an extra WR since the prod and cons indices of
1608 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1609 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1610 	 * double the number of entries due an iSER issue that pushes far more
1611 	 * WRs than indicated. If we decline its ib_post_send() then we get
1612 	 * error prints in the dmesg we'd like to avoid.
1613 	 */
1614 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1615 			      dev->attr.max_sqe);
1616 
1617 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
1618 				GFP_KERNEL);
1619 	if (!qp->wqe_wr_id) {
1620 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1621 		return -ENOMEM;
1622 	}
1623 
1624 	/* QP handle to be written in CQE */
1625 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1626 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1627 
1628 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1629 	 * the ring. There ring should allow at least a single WR, even if the
1630 	 * user requested none, due to allocation issues.
1631 	 */
1632 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1633 
1634 	/* Allocate driver internal RQ array */
1635 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
1636 				GFP_KERNEL);
1637 	if (!qp->rqe_wr_id) {
1638 		DP_ERR(dev,
1639 		       "create qp: failed RQ shadow memory allocation\n");
1640 		kfree(qp->wqe_wr_id);
1641 		return -ENOMEM;
1642 	}
1643 
1644 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1645 
1646 	n_sq_entries = attrs->cap.max_send_wr;
1647 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1648 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1649 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1650 
1651 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1652 
1653 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1654 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1655 						 n_sq_elems, n_rq_elems);
1656 	else
1657 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1658 						n_sq_elems, n_rq_elems);
1659 	if (rc)
1660 		qedr_cleanup_kernel(dev, qp);
1661 
1662 	return rc;
1663 }
1664 
1665 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1666 			     struct ib_qp_init_attr *attrs,
1667 			     struct ib_udata *udata)
1668 {
1669 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1670 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1671 	struct qedr_qp *qp;
1672 	struct ib_qp *ibqp;
1673 	int rc = 0;
1674 
1675 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1676 		 udata ? "user library" : "kernel", pd);
1677 
1678 	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1679 	if (rc)
1680 		return ERR_PTR(rc);
1681 
1682 	if (attrs->srq)
1683 		return ERR_PTR(-EINVAL);
1684 
1685 	DP_DEBUG(dev, QEDR_MSG_QP,
1686 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1687 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1688 		 get_qedr_cq(attrs->send_cq),
1689 		 get_qedr_cq(attrs->send_cq)->icid,
1690 		 get_qedr_cq(attrs->recv_cq),
1691 		 get_qedr_cq(attrs->recv_cq)->icid);
1692 
1693 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1694 	if (!qp) {
1695 		DP_ERR(dev, "create qp: failed allocating memory\n");
1696 		return ERR_PTR(-ENOMEM);
1697 	}
1698 
1699 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1700 
1701 	if (attrs->qp_type == IB_QPT_GSI) {
1702 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1703 		if (IS_ERR(ibqp))
1704 			kfree(qp);
1705 		return ibqp;
1706 	}
1707 
1708 	if (udata)
1709 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1710 	else
1711 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1712 
1713 	if (rc)
1714 		goto err;
1715 
1716 	qp->ibqp.qp_num = qp->qp_id;
1717 
1718 	rc = qedr_idr_add(dev, qp, qp->qp_id);
1719 	if (rc)
1720 		goto err;
1721 
1722 	return &qp->ibqp;
1723 
1724 err:
1725 	kfree(qp);
1726 
1727 	return ERR_PTR(-EFAULT);
1728 }
1729 
1730 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1731 {
1732 	switch (qp_state) {
1733 	case QED_ROCE_QP_STATE_RESET:
1734 		return IB_QPS_RESET;
1735 	case QED_ROCE_QP_STATE_INIT:
1736 		return IB_QPS_INIT;
1737 	case QED_ROCE_QP_STATE_RTR:
1738 		return IB_QPS_RTR;
1739 	case QED_ROCE_QP_STATE_RTS:
1740 		return IB_QPS_RTS;
1741 	case QED_ROCE_QP_STATE_SQD:
1742 		return IB_QPS_SQD;
1743 	case QED_ROCE_QP_STATE_ERR:
1744 		return IB_QPS_ERR;
1745 	case QED_ROCE_QP_STATE_SQE:
1746 		return IB_QPS_SQE;
1747 	}
1748 	return IB_QPS_ERR;
1749 }
1750 
1751 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1752 					enum ib_qp_state qp_state)
1753 {
1754 	switch (qp_state) {
1755 	case IB_QPS_RESET:
1756 		return QED_ROCE_QP_STATE_RESET;
1757 	case IB_QPS_INIT:
1758 		return QED_ROCE_QP_STATE_INIT;
1759 	case IB_QPS_RTR:
1760 		return QED_ROCE_QP_STATE_RTR;
1761 	case IB_QPS_RTS:
1762 		return QED_ROCE_QP_STATE_RTS;
1763 	case IB_QPS_SQD:
1764 		return QED_ROCE_QP_STATE_SQD;
1765 	case IB_QPS_ERR:
1766 		return QED_ROCE_QP_STATE_ERR;
1767 	default:
1768 		return QED_ROCE_QP_STATE_ERR;
1769 	}
1770 }
1771 
1772 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1773 {
1774 	qed_chain_reset(&qph->pbl);
1775 	qph->prod = 0;
1776 	qph->cons = 0;
1777 	qph->wqe_cons = 0;
1778 	qph->db_data.data.value = cpu_to_le16(0);
1779 }
1780 
1781 static int qedr_update_qp_state(struct qedr_dev *dev,
1782 				struct qedr_qp *qp,
1783 				enum qed_roce_qp_state cur_state,
1784 				enum qed_roce_qp_state new_state)
1785 {
1786 	int status = 0;
1787 
1788 	if (new_state == cur_state)
1789 		return 0;
1790 
1791 	switch (cur_state) {
1792 	case QED_ROCE_QP_STATE_RESET:
1793 		switch (new_state) {
1794 		case QED_ROCE_QP_STATE_INIT:
1795 			qp->prev_wqe_size = 0;
1796 			qedr_reset_qp_hwq_info(&qp->sq);
1797 			qedr_reset_qp_hwq_info(&qp->rq);
1798 			break;
1799 		default:
1800 			status = -EINVAL;
1801 			break;
1802 		};
1803 		break;
1804 	case QED_ROCE_QP_STATE_INIT:
1805 		switch (new_state) {
1806 		case QED_ROCE_QP_STATE_RTR:
1807 			/* Update doorbell (in case post_recv was
1808 			 * done before move to RTR)
1809 			 */
1810 
1811 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
1812 				writel(qp->rq.db_data.raw, qp->rq.db);
1813 				/* Make sure write takes effect */
1814 				mmiowb();
1815 			}
1816 			break;
1817 		case QED_ROCE_QP_STATE_ERR:
1818 			break;
1819 		default:
1820 			/* Invalid state change. */
1821 			status = -EINVAL;
1822 			break;
1823 		};
1824 		break;
1825 	case QED_ROCE_QP_STATE_RTR:
1826 		/* RTR->XXX */
1827 		switch (new_state) {
1828 		case QED_ROCE_QP_STATE_RTS:
1829 			break;
1830 		case QED_ROCE_QP_STATE_ERR:
1831 			break;
1832 		default:
1833 			/* Invalid state change. */
1834 			status = -EINVAL;
1835 			break;
1836 		};
1837 		break;
1838 	case QED_ROCE_QP_STATE_RTS:
1839 		/* RTS->XXX */
1840 		switch (new_state) {
1841 		case QED_ROCE_QP_STATE_SQD:
1842 			break;
1843 		case QED_ROCE_QP_STATE_ERR:
1844 			break;
1845 		default:
1846 			/* Invalid state change. */
1847 			status = -EINVAL;
1848 			break;
1849 		};
1850 		break;
1851 	case QED_ROCE_QP_STATE_SQD:
1852 		/* SQD->XXX */
1853 		switch (new_state) {
1854 		case QED_ROCE_QP_STATE_RTS:
1855 		case QED_ROCE_QP_STATE_ERR:
1856 			break;
1857 		default:
1858 			/* Invalid state change. */
1859 			status = -EINVAL;
1860 			break;
1861 		};
1862 		break;
1863 	case QED_ROCE_QP_STATE_ERR:
1864 		/* ERR->XXX */
1865 		switch (new_state) {
1866 		case QED_ROCE_QP_STATE_RESET:
1867 			if ((qp->rq.prod != qp->rq.cons) ||
1868 			    (qp->sq.prod != qp->sq.cons)) {
1869 				DP_NOTICE(dev,
1870 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1871 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1872 					  qp->sq.cons);
1873 				status = -EINVAL;
1874 			}
1875 			break;
1876 		default:
1877 			status = -EINVAL;
1878 			break;
1879 		};
1880 		break;
1881 	default:
1882 		status = -EINVAL;
1883 		break;
1884 	};
1885 
1886 	return status;
1887 }
1888 
1889 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1890 		   int attr_mask, struct ib_udata *udata)
1891 {
1892 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1893 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1894 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1895 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1896 	enum ib_qp_state old_qp_state, new_qp_state;
1897 	enum qed_roce_qp_state cur_state;
1898 	int rc = 0;
1899 
1900 	DP_DEBUG(dev, QEDR_MSG_QP,
1901 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1902 		 attr->qp_state);
1903 
1904 	old_qp_state = qedr_get_ibqp_state(qp->state);
1905 	if (attr_mask & IB_QP_STATE)
1906 		new_qp_state = attr->qp_state;
1907 	else
1908 		new_qp_state = old_qp_state;
1909 
1910 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1911 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
1912 					ibqp->qp_type, attr_mask,
1913 					IB_LINK_LAYER_ETHERNET)) {
1914 			DP_ERR(dev,
1915 			       "modify qp: invalid attribute mask=0x%x specified for\n"
1916 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1917 			       attr_mask, qp->qp_id, ibqp->qp_type,
1918 			       old_qp_state, new_qp_state);
1919 			rc = -EINVAL;
1920 			goto err;
1921 		}
1922 	}
1923 
1924 	/* Translate the masks... */
1925 	if (attr_mask & IB_QP_STATE) {
1926 		SET_FIELD(qp_params.modify_flags,
1927 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1928 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1929 	}
1930 
1931 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1932 		qp_params.sqd_async = true;
1933 
1934 	if (attr_mask & IB_QP_PKEY_INDEX) {
1935 		SET_FIELD(qp_params.modify_flags,
1936 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1937 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1938 			rc = -EINVAL;
1939 			goto err;
1940 		}
1941 
1942 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1943 	}
1944 
1945 	if (attr_mask & IB_QP_QKEY)
1946 		qp->qkey = attr->qkey;
1947 
1948 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1949 		SET_FIELD(qp_params.modify_flags,
1950 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1951 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1952 						  IB_ACCESS_REMOTE_READ;
1953 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1954 						   IB_ACCESS_REMOTE_WRITE;
1955 		qp_params.incoming_atomic_en = attr->qp_access_flags &
1956 					       IB_ACCESS_REMOTE_ATOMIC;
1957 	}
1958 
1959 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1960 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
1961 			return -EINVAL;
1962 
1963 		if (attr_mask & IB_QP_PATH_MTU) {
1964 			if (attr->path_mtu < IB_MTU_256 ||
1965 			    attr->path_mtu > IB_MTU_4096) {
1966 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1967 				rc = -EINVAL;
1968 				goto err;
1969 			}
1970 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1971 				      ib_mtu_enum_to_int(iboe_get_mtu
1972 							 (dev->ndev->mtu)));
1973 		}
1974 
1975 		if (!qp->mtu) {
1976 			qp->mtu =
1977 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1978 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1979 		}
1980 
1981 		SET_FIELD(qp_params.modify_flags,
1982 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1983 
1984 		qp_params.traffic_class_tos = grh->traffic_class;
1985 		qp_params.flow_label = grh->flow_label;
1986 		qp_params.hop_limit_ttl = grh->hop_limit;
1987 
1988 		qp->sgid_idx = grh->sgid_index;
1989 
1990 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1991 		if (rc) {
1992 			DP_ERR(dev,
1993 			       "modify qp: problems with GID index %d (rc=%d)\n",
1994 			       grh->sgid_index, rc);
1995 			return rc;
1996 		}
1997 
1998 		rc = qedr_get_dmac(dev, &attr->ah_attr,
1999 				   qp_params.remote_mac_addr);
2000 		if (rc)
2001 			return rc;
2002 
2003 		qp_params.use_local_mac = true;
2004 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2005 
2006 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2007 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2008 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2009 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2010 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2011 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2012 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2013 			 qp_params.remote_mac_addr);
2014 
2015 		qp_params.mtu = qp->mtu;
2016 		qp_params.lb_indication = false;
2017 	}
2018 
2019 	if (!qp_params.mtu) {
2020 		/* Stay with current MTU */
2021 		if (qp->mtu)
2022 			qp_params.mtu = qp->mtu;
2023 		else
2024 			qp_params.mtu =
2025 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2026 	}
2027 
2028 	if (attr_mask & IB_QP_TIMEOUT) {
2029 		SET_FIELD(qp_params.modify_flags,
2030 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2031 
2032 		/* The received timeout value is an exponent used like this:
2033 		 *    "12.7.34 LOCAL ACK TIMEOUT
2034 		 *    Value representing the transport (ACK) timeout for use by
2035 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2036 		 * The FW expects timeout in msec so we need to divide the usec
2037 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2038 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2039 		 * The value of zero means infinite so we use a 'max_t' to make
2040 		 * sure that sub 1 msec values will be configured as 1 msec.
2041 		 */
2042 		if (attr->timeout)
2043 			qp_params.ack_timeout =
2044 					1 << max_t(int, attr->timeout - 8, 0);
2045 		else
2046 			qp_params.ack_timeout = 0;
2047 	}
2048 
2049 	if (attr_mask & IB_QP_RETRY_CNT) {
2050 		SET_FIELD(qp_params.modify_flags,
2051 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2052 		qp_params.retry_cnt = attr->retry_cnt;
2053 	}
2054 
2055 	if (attr_mask & IB_QP_RNR_RETRY) {
2056 		SET_FIELD(qp_params.modify_flags,
2057 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2058 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2059 	}
2060 
2061 	if (attr_mask & IB_QP_RQ_PSN) {
2062 		SET_FIELD(qp_params.modify_flags,
2063 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2064 		qp_params.rq_psn = attr->rq_psn;
2065 		qp->rq_psn = attr->rq_psn;
2066 	}
2067 
2068 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2069 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2070 			rc = -EINVAL;
2071 			DP_ERR(dev,
2072 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2073 			       attr->max_rd_atomic,
2074 			       dev->attr.max_qp_req_rd_atomic_resc);
2075 			goto err;
2076 		}
2077 
2078 		SET_FIELD(qp_params.modify_flags,
2079 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2080 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2081 	}
2082 
2083 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2084 		SET_FIELD(qp_params.modify_flags,
2085 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2086 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2087 	}
2088 
2089 	if (attr_mask & IB_QP_SQ_PSN) {
2090 		SET_FIELD(qp_params.modify_flags,
2091 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2092 		qp_params.sq_psn = attr->sq_psn;
2093 		qp->sq_psn = attr->sq_psn;
2094 	}
2095 
2096 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2097 		if (attr->max_dest_rd_atomic >
2098 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2099 			DP_ERR(dev,
2100 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2101 			       attr->max_dest_rd_atomic,
2102 			       dev->attr.max_qp_resp_rd_atomic_resc);
2103 
2104 			rc = -EINVAL;
2105 			goto err;
2106 		}
2107 
2108 		SET_FIELD(qp_params.modify_flags,
2109 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2110 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2111 	}
2112 
2113 	if (attr_mask & IB_QP_DEST_QPN) {
2114 		SET_FIELD(qp_params.modify_flags,
2115 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2116 
2117 		qp_params.dest_qp = attr->dest_qp_num;
2118 		qp->dest_qp_num = attr->dest_qp_num;
2119 	}
2120 
2121 	cur_state = qp->state;
2122 
2123 	/* Update the QP state before the actual ramrod to prevent a race with
2124 	 * fast path. Modifying the QP state to error will cause the device to
2125 	 * flush the CQEs and while polling the flushed CQEs will considered as
2126 	 * a potential issue if the QP isn't in error state.
2127 	 */
2128 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2129 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2130 		qp->state = QED_ROCE_QP_STATE_ERR;
2131 
2132 	if (qp->qp_type != IB_QPT_GSI)
2133 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2134 					      qp->qed_qp, &qp_params);
2135 
2136 	if (attr_mask & IB_QP_STATE) {
2137 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2138 			rc = qedr_update_qp_state(dev, qp, cur_state,
2139 						  qp_params.new_state);
2140 		qp->state = qp_params.new_state;
2141 	}
2142 
2143 err:
2144 	return rc;
2145 }
2146 
2147 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2148 {
2149 	int ib_qp_acc_flags = 0;
2150 
2151 	if (params->incoming_rdma_write_en)
2152 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2153 	if (params->incoming_rdma_read_en)
2154 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2155 	if (params->incoming_atomic_en)
2156 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2157 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2158 	return ib_qp_acc_flags;
2159 }
2160 
2161 int qedr_query_qp(struct ib_qp *ibqp,
2162 		  struct ib_qp_attr *qp_attr,
2163 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2164 {
2165 	struct qed_rdma_query_qp_out_params params;
2166 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2167 	struct qedr_dev *dev = qp->dev;
2168 	int rc = 0;
2169 
2170 	memset(&params, 0, sizeof(params));
2171 
2172 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2173 	if (rc)
2174 		goto err;
2175 
2176 	memset(qp_attr, 0, sizeof(*qp_attr));
2177 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2178 
2179 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2180 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2181 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2182 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2183 	qp_attr->rq_psn = params.rq_psn;
2184 	qp_attr->sq_psn = params.sq_psn;
2185 	qp_attr->dest_qp_num = params.dest_qp;
2186 
2187 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2188 
2189 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2190 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2191 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2192 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2193 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2194 	qp_init_attr->cap = qp_attr->cap;
2195 
2196 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2197 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2198 			params.flow_label, qp->sgid_idx,
2199 			params.hop_limit_ttl, params.traffic_class_tos);
2200 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2201 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2202 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2203 	qp_attr->timeout = params.timeout;
2204 	qp_attr->rnr_retry = params.rnr_retry;
2205 	qp_attr->retry_cnt = params.retry_cnt;
2206 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2207 	qp_attr->pkey_index = params.pkey_index;
2208 	qp_attr->port_num = 1;
2209 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2210 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2211 	qp_attr->alt_pkey_index = 0;
2212 	qp_attr->alt_port_num = 0;
2213 	qp_attr->alt_timeout = 0;
2214 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2215 
2216 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2217 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2218 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2219 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2220 
2221 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2222 		 qp_attr->cap.max_inline_data);
2223 
2224 err:
2225 	return rc;
2226 }
2227 
2228 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2229 {
2230 	int rc = 0;
2231 
2232 	if (qp->qp_type != IB_QPT_GSI) {
2233 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2234 		if (rc)
2235 			return rc;
2236 	}
2237 
2238 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2239 		qedr_cleanup_user(dev, qp);
2240 	else
2241 		qedr_cleanup_kernel(dev, qp);
2242 
2243 	return 0;
2244 }
2245 
2246 int qedr_destroy_qp(struct ib_qp *ibqp)
2247 {
2248 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2249 	struct qedr_dev *dev = qp->dev;
2250 	struct ib_qp_attr attr;
2251 	int attr_mask = 0;
2252 	int rc = 0;
2253 
2254 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2255 		 qp, qp->qp_type);
2256 
2257 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2258 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2259 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2260 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2261 
2262 			attr.qp_state = IB_QPS_ERR;
2263 			attr_mask |= IB_QP_STATE;
2264 
2265 			/* Change the QP state to ERROR */
2266 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2267 		}
2268 	} else {
2269 		/* Wait for the connect/accept to complete */
2270 		if (qp->ep) {
2271 			int wait_count = 1;
2272 
2273 			while (qp->ep->during_connect) {
2274 				DP_DEBUG(dev, QEDR_MSG_QP,
2275 					 "Still in during connect/accept\n");
2276 
2277 				msleep(100);
2278 				if (wait_count++ > 200) {
2279 					DP_NOTICE(dev,
2280 						  "during connect timeout\n");
2281 					break;
2282 				}
2283 			}
2284 		}
2285 	}
2286 
2287 	if (qp->qp_type == IB_QPT_GSI)
2288 		qedr_destroy_gsi_qp(dev);
2289 
2290 	qedr_free_qp_resources(dev, qp);
2291 
2292 	if (atomic_dec_and_test(&qp->refcnt)) {
2293 		qedr_idr_remove(dev, qp->qp_id);
2294 		kfree(qp);
2295 	}
2296 	return rc;
2297 }
2298 
2299 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2300 			     struct ib_udata *udata)
2301 {
2302 	struct qedr_ah *ah;
2303 
2304 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2305 	if (!ah)
2306 		return ERR_PTR(-ENOMEM);
2307 
2308 	ah->attr = *attr;
2309 
2310 	return &ah->ibah;
2311 }
2312 
2313 int qedr_destroy_ah(struct ib_ah *ibah)
2314 {
2315 	struct qedr_ah *ah = get_qedr_ah(ibah);
2316 
2317 	kfree(ah);
2318 	return 0;
2319 }
2320 
2321 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2322 {
2323 	struct qedr_pbl *pbl, *tmp;
2324 
2325 	if (info->pbl_table)
2326 		list_add_tail(&info->pbl_table->list_entry,
2327 			      &info->free_pbl_list);
2328 
2329 	if (!list_empty(&info->inuse_pbl_list))
2330 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2331 
2332 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2333 		list_del(&pbl->list_entry);
2334 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2335 	}
2336 }
2337 
2338 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2339 			size_t page_list_len, bool two_layered)
2340 {
2341 	struct qedr_pbl *tmp;
2342 	int rc;
2343 
2344 	INIT_LIST_HEAD(&info->free_pbl_list);
2345 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2346 
2347 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2348 				  page_list_len, two_layered);
2349 	if (rc)
2350 		goto done;
2351 
2352 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2353 	if (IS_ERR(info->pbl_table)) {
2354 		rc = PTR_ERR(info->pbl_table);
2355 		goto done;
2356 	}
2357 
2358 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2359 		 &info->pbl_table->pa);
2360 
2361 	/* in usual case we use 2 PBLs, so we add one to free
2362 	 * list and allocating another one
2363 	 */
2364 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2365 	if (IS_ERR(tmp)) {
2366 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2367 		goto done;
2368 	}
2369 
2370 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2371 
2372 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2373 
2374 done:
2375 	if (rc)
2376 		free_mr_info(dev, info);
2377 
2378 	return rc;
2379 }
2380 
2381 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2382 			       u64 usr_addr, int acc, struct ib_udata *udata)
2383 {
2384 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2385 	struct qedr_mr *mr;
2386 	struct qedr_pd *pd;
2387 	int rc = -ENOMEM;
2388 
2389 	pd = get_qedr_pd(ibpd);
2390 	DP_DEBUG(dev, QEDR_MSG_MR,
2391 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2392 		 pd->pd_id, start, len, usr_addr, acc);
2393 
2394 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2395 		return ERR_PTR(-EINVAL);
2396 
2397 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2398 	if (!mr)
2399 		return ERR_PTR(rc);
2400 
2401 	mr->type = QEDR_MR_USER;
2402 
2403 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2404 	if (IS_ERR(mr->umem)) {
2405 		rc = -EFAULT;
2406 		goto err0;
2407 	}
2408 
2409 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2410 	if (rc)
2411 		goto err1;
2412 
2413 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2414 			   &mr->info.pbl_info, mr->umem->page_shift);
2415 
2416 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2417 	if (rc) {
2418 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2419 		goto err1;
2420 	}
2421 
2422 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2423 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2424 	mr->hw_mr.key = 0;
2425 	mr->hw_mr.pd = pd->pd_id;
2426 	mr->hw_mr.local_read = 1;
2427 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2428 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2429 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2430 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2431 	mr->hw_mr.mw_bind = false;
2432 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2433 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2434 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2435 	mr->hw_mr.page_size_log = mr->umem->page_shift;
2436 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2437 	mr->hw_mr.length = len;
2438 	mr->hw_mr.vaddr = usr_addr;
2439 	mr->hw_mr.zbva = false;
2440 	mr->hw_mr.phy_mr = false;
2441 	mr->hw_mr.dma_mr = false;
2442 
2443 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2444 	if (rc) {
2445 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2446 		goto err2;
2447 	}
2448 
2449 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2450 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2451 	    mr->hw_mr.remote_atomic)
2452 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2453 
2454 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2455 		 mr->ibmr.lkey);
2456 	return &mr->ibmr;
2457 
2458 err2:
2459 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2460 err1:
2461 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2462 err0:
2463 	kfree(mr);
2464 	return ERR_PTR(rc);
2465 }
2466 
2467 int qedr_dereg_mr(struct ib_mr *ib_mr)
2468 {
2469 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2470 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2471 	int rc = 0;
2472 
2473 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2474 	if (rc)
2475 		return rc;
2476 
2477 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2478 
2479 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2480 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2481 
2482 	/* it could be user registered memory. */
2483 	if (mr->umem)
2484 		ib_umem_release(mr->umem);
2485 
2486 	kfree(mr);
2487 
2488 	return rc;
2489 }
2490 
2491 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2492 				       int max_page_list_len)
2493 {
2494 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2495 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2496 	struct qedr_mr *mr;
2497 	int rc = -ENOMEM;
2498 
2499 	DP_DEBUG(dev, QEDR_MSG_MR,
2500 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2501 		 max_page_list_len);
2502 
2503 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2504 	if (!mr)
2505 		return ERR_PTR(rc);
2506 
2507 	mr->dev = dev;
2508 	mr->type = QEDR_MR_FRMR;
2509 
2510 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2511 	if (rc)
2512 		goto err0;
2513 
2514 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2515 	if (rc) {
2516 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2517 		goto err0;
2518 	}
2519 
2520 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2521 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2522 	mr->hw_mr.key = 0;
2523 	mr->hw_mr.pd = pd->pd_id;
2524 	mr->hw_mr.local_read = 1;
2525 	mr->hw_mr.local_write = 0;
2526 	mr->hw_mr.remote_read = 0;
2527 	mr->hw_mr.remote_write = 0;
2528 	mr->hw_mr.remote_atomic = 0;
2529 	mr->hw_mr.mw_bind = false;
2530 	mr->hw_mr.pbl_ptr = 0;
2531 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2532 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2533 	mr->hw_mr.fbo = 0;
2534 	mr->hw_mr.length = 0;
2535 	mr->hw_mr.vaddr = 0;
2536 	mr->hw_mr.zbva = false;
2537 	mr->hw_mr.phy_mr = true;
2538 	mr->hw_mr.dma_mr = false;
2539 
2540 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2541 	if (rc) {
2542 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2543 		goto err1;
2544 	}
2545 
2546 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2547 	mr->ibmr.rkey = mr->ibmr.lkey;
2548 
2549 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2550 	return mr;
2551 
2552 err1:
2553 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2554 err0:
2555 	kfree(mr);
2556 	return ERR_PTR(rc);
2557 }
2558 
2559 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2560 			    enum ib_mr_type mr_type, u32 max_num_sg)
2561 {
2562 	struct qedr_mr *mr;
2563 
2564 	if (mr_type != IB_MR_TYPE_MEM_REG)
2565 		return ERR_PTR(-EINVAL);
2566 
2567 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2568 
2569 	if (IS_ERR(mr))
2570 		return ERR_PTR(-EINVAL);
2571 
2572 	return &mr->ibmr;
2573 }
2574 
2575 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2576 {
2577 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2578 	struct qedr_pbl *pbl_table;
2579 	struct regpair *pbe;
2580 	u32 pbes_in_page;
2581 
2582 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2583 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2584 		return -ENOMEM;
2585 	}
2586 
2587 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2588 		 mr->npages, addr);
2589 
2590 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2591 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2592 	pbe = (struct regpair *)pbl_table->va;
2593 	pbe +=  mr->npages % pbes_in_page;
2594 	pbe->lo = cpu_to_le32((u32)addr);
2595 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2596 
2597 	mr->npages++;
2598 
2599 	return 0;
2600 }
2601 
2602 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2603 {
2604 	int work = info->completed - info->completed_handled - 1;
2605 
2606 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2607 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2608 		struct qedr_pbl *pbl;
2609 
2610 		/* Free all the page list that are possible to be freed
2611 		 * (all the ones that were invalidated), under the assumption
2612 		 * that if an FMR was completed successfully that means that
2613 		 * if there was an invalidate operation before it also ended
2614 		 */
2615 		pbl = list_first_entry(&info->inuse_pbl_list,
2616 				       struct qedr_pbl, list_entry);
2617 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2618 		info->completed_handled++;
2619 	}
2620 }
2621 
2622 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2623 		   int sg_nents, unsigned int *sg_offset)
2624 {
2625 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2626 
2627 	mr->npages = 0;
2628 
2629 	handle_completed_mrs(mr->dev, &mr->info);
2630 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2631 }
2632 
2633 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2634 {
2635 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2636 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2637 	struct qedr_mr *mr;
2638 	int rc;
2639 
2640 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2641 	if (!mr)
2642 		return ERR_PTR(-ENOMEM);
2643 
2644 	mr->type = QEDR_MR_DMA;
2645 
2646 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2647 	if (rc) {
2648 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2649 		goto err1;
2650 	}
2651 
2652 	/* index only, 18 bit long, lkey = itid << 8 | key */
2653 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2654 	mr->hw_mr.pd = pd->pd_id;
2655 	mr->hw_mr.local_read = 1;
2656 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2657 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2658 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2659 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2660 	mr->hw_mr.dma_mr = true;
2661 
2662 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2663 	if (rc) {
2664 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2665 		goto err2;
2666 	}
2667 
2668 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2669 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2670 	    mr->hw_mr.remote_atomic)
2671 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2672 
2673 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2674 	return &mr->ibmr;
2675 
2676 err2:
2677 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2678 err1:
2679 	kfree(mr);
2680 	return ERR_PTR(rc);
2681 }
2682 
2683 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2684 {
2685 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2686 }
2687 
2688 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2689 {
2690 	int i, len = 0;
2691 
2692 	for (i = 0; i < num_sge; i++)
2693 		len += sg_list[i].length;
2694 
2695 	return len;
2696 }
2697 
2698 static void swap_wqe_data64(u64 *p)
2699 {
2700 	int i;
2701 
2702 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2703 		*p = cpu_to_be64(cpu_to_le64(*p));
2704 }
2705 
2706 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2707 				       struct qedr_qp *qp, u8 *wqe_size,
2708 				       struct ib_send_wr *wr,
2709 				       struct ib_send_wr **bad_wr, u8 *bits,
2710 				       u8 bit)
2711 {
2712 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2713 	char *seg_prt, *wqe;
2714 	int i, seg_siz;
2715 
2716 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2717 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2718 		*bad_wr = wr;
2719 		return 0;
2720 	}
2721 
2722 	if (!data_size)
2723 		return data_size;
2724 
2725 	*bits |= bit;
2726 
2727 	seg_prt = NULL;
2728 	wqe = NULL;
2729 	seg_siz = 0;
2730 
2731 	/* Copy data inline */
2732 	for (i = 0; i < wr->num_sge; i++) {
2733 		u32 len = wr->sg_list[i].length;
2734 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2735 
2736 		while (len > 0) {
2737 			u32 cur;
2738 
2739 			/* New segment required */
2740 			if (!seg_siz) {
2741 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2742 				seg_prt = wqe;
2743 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2744 				(*wqe_size)++;
2745 			}
2746 
2747 			/* Calculate currently allowed length */
2748 			cur = min_t(u32, len, seg_siz);
2749 			memcpy(seg_prt, src, cur);
2750 
2751 			/* Update segment variables */
2752 			seg_prt += cur;
2753 			seg_siz -= cur;
2754 
2755 			/* Update sge variables */
2756 			src += cur;
2757 			len -= cur;
2758 
2759 			/* Swap fully-completed segments */
2760 			if (!seg_siz)
2761 				swap_wqe_data64((u64 *)wqe);
2762 		}
2763 	}
2764 
2765 	/* swap last not completed segment */
2766 	if (seg_siz)
2767 		swap_wqe_data64((u64 *)wqe);
2768 
2769 	return data_size;
2770 }
2771 
2772 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2773 	do {							\
2774 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2775 		(sge)->length = cpu_to_le32(vlength);		\
2776 		(sge)->flags = cpu_to_le32(vflags);		\
2777 	} while (0)
2778 
2779 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2780 	do {							\
2781 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2782 		(hdr)->num_sges = num_sge;			\
2783 	} while (0)
2784 
2785 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2786 	do {							\
2787 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2788 		(sge)->length = cpu_to_le32(vlength);		\
2789 		(sge)->l_key = cpu_to_le32(vlkey);		\
2790 	} while (0)
2791 
2792 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2793 				struct ib_send_wr *wr)
2794 {
2795 	u32 data_size = 0;
2796 	int i;
2797 
2798 	for (i = 0; i < wr->num_sge; i++) {
2799 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2800 
2801 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2802 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2803 		sge->length = cpu_to_le32(wr->sg_list[i].length);
2804 		data_size += wr->sg_list[i].length;
2805 	}
2806 
2807 	if (wqe_size)
2808 		*wqe_size += wr->num_sge;
2809 
2810 	return data_size;
2811 }
2812 
2813 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2814 				     struct qedr_qp *qp,
2815 				     struct rdma_sq_rdma_wqe_1st *rwqe,
2816 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2817 				     struct ib_send_wr *wr,
2818 				     struct ib_send_wr **bad_wr)
2819 {
2820 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2821 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2822 
2823 	if (wr->send_flags & IB_SEND_INLINE &&
2824 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2825 	     wr->opcode == IB_WR_RDMA_WRITE)) {
2826 		u8 flags = 0;
2827 
2828 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2829 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2830 						   bad_wr, &rwqe->flags, flags);
2831 	}
2832 
2833 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2834 }
2835 
2836 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2837 				     struct qedr_qp *qp,
2838 				     struct rdma_sq_send_wqe_1st *swqe,
2839 				     struct rdma_sq_send_wqe_2st *swqe2,
2840 				     struct ib_send_wr *wr,
2841 				     struct ib_send_wr **bad_wr)
2842 {
2843 	memset(swqe2, 0, sizeof(*swqe2));
2844 	if (wr->send_flags & IB_SEND_INLINE) {
2845 		u8 flags = 0;
2846 
2847 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2848 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2849 						   bad_wr, &swqe->flags, flags);
2850 	}
2851 
2852 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2853 }
2854 
2855 static int qedr_prepare_reg(struct qedr_qp *qp,
2856 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2857 			    struct ib_reg_wr *wr)
2858 {
2859 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2860 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2861 
2862 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2863 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2864 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2865 	fwqe1->l_key = wr->key;
2866 
2867 	fwqe2->access_ctrl = 0;
2868 
2869 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2870 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2871 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2872 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2873 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2874 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2875 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2876 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2877 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2878 	fwqe2->fmr_ctrl = 0;
2879 
2880 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2881 		   ilog2(mr->ibmr.page_size) - 12);
2882 
2883 	fwqe2->length_hi = 0;
2884 	fwqe2->length_lo = mr->ibmr.length;
2885 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2886 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2887 
2888 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2889 
2890 	return 0;
2891 }
2892 
2893 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2894 {
2895 	switch (opcode) {
2896 	case IB_WR_RDMA_WRITE:
2897 	case IB_WR_RDMA_WRITE_WITH_IMM:
2898 		return IB_WC_RDMA_WRITE;
2899 	case IB_WR_SEND_WITH_IMM:
2900 	case IB_WR_SEND:
2901 	case IB_WR_SEND_WITH_INV:
2902 		return IB_WC_SEND;
2903 	case IB_WR_RDMA_READ:
2904 	case IB_WR_RDMA_READ_WITH_INV:
2905 		return IB_WC_RDMA_READ;
2906 	case IB_WR_ATOMIC_CMP_AND_SWP:
2907 		return IB_WC_COMP_SWAP;
2908 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2909 		return IB_WC_FETCH_ADD;
2910 	case IB_WR_REG_MR:
2911 		return IB_WC_REG_MR;
2912 	case IB_WR_LOCAL_INV:
2913 		return IB_WC_LOCAL_INV;
2914 	default:
2915 		return IB_WC_SEND;
2916 	}
2917 }
2918 
2919 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2920 {
2921 	int wq_is_full, err_wr, pbl_is_full;
2922 	struct qedr_dev *dev = qp->dev;
2923 
2924 	/* prevent SQ overflow and/or processing of a bad WR */
2925 	err_wr = wr->num_sge > qp->sq.max_sges;
2926 	wq_is_full = qedr_wq_is_full(&qp->sq);
2927 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2928 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2929 	if (wq_is_full || err_wr || pbl_is_full) {
2930 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2931 			DP_ERR(dev,
2932 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2933 			       qp);
2934 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2935 		}
2936 
2937 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2938 			DP_ERR(dev,
2939 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2940 			       qp);
2941 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2942 		}
2943 
2944 		if (pbl_is_full &&
2945 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2946 			DP_ERR(dev,
2947 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2948 			       qp);
2949 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2950 		}
2951 		return false;
2952 	}
2953 	return true;
2954 }
2955 
2956 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2957 		     struct ib_send_wr **bad_wr)
2958 {
2959 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2960 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2961 	struct rdma_sq_atomic_wqe_1st *awqe1;
2962 	struct rdma_sq_atomic_wqe_2nd *awqe2;
2963 	struct rdma_sq_atomic_wqe_3rd *awqe3;
2964 	struct rdma_sq_send_wqe_2st *swqe2;
2965 	struct rdma_sq_local_inv_wqe *iwqe;
2966 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2967 	struct rdma_sq_send_wqe_1st *swqe;
2968 	struct rdma_sq_rdma_wqe_1st *rwqe;
2969 	struct rdma_sq_fmr_wqe_1st *fwqe1;
2970 	struct rdma_sq_common_wqe *wqe;
2971 	u32 length;
2972 	int rc = 0;
2973 	bool comp;
2974 
2975 	if (!qedr_can_post_send(qp, wr)) {
2976 		*bad_wr = wr;
2977 		return -ENOMEM;
2978 	}
2979 
2980 	wqe = qed_chain_produce(&qp->sq.pbl);
2981 	qp->wqe_wr_id[qp->sq.prod].signaled =
2982 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2983 
2984 	wqe->flags = 0;
2985 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2986 		   !!(wr->send_flags & IB_SEND_SOLICITED));
2987 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2988 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2989 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2990 		   !!(wr->send_flags & IB_SEND_FENCE));
2991 	wqe->prev_wqe_size = qp->prev_wqe_size;
2992 
2993 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2994 
2995 	switch (wr->opcode) {
2996 	case IB_WR_SEND_WITH_IMM:
2997 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
2998 			rc = -EINVAL;
2999 			*bad_wr = wr;
3000 			break;
3001 		}
3002 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3003 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3004 		swqe->wqe_size = 2;
3005 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3006 
3007 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3008 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3009 						   wr, bad_wr);
3010 		swqe->length = cpu_to_le32(length);
3011 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3012 		qp->prev_wqe_size = swqe->wqe_size;
3013 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3014 		break;
3015 	case IB_WR_SEND:
3016 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3017 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3018 
3019 		swqe->wqe_size = 2;
3020 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3021 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3022 						   wr, bad_wr);
3023 		swqe->length = cpu_to_le32(length);
3024 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3025 		qp->prev_wqe_size = swqe->wqe_size;
3026 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3027 		break;
3028 	case IB_WR_SEND_WITH_INV:
3029 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3030 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3031 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3032 		swqe->wqe_size = 2;
3033 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3034 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3035 						   wr, bad_wr);
3036 		swqe->length = cpu_to_le32(length);
3037 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3038 		qp->prev_wqe_size = swqe->wqe_size;
3039 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3040 		break;
3041 
3042 	case IB_WR_RDMA_WRITE_WITH_IMM:
3043 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3044 			rc = -EINVAL;
3045 			*bad_wr = wr;
3046 			break;
3047 		}
3048 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3049 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3050 
3051 		rwqe->wqe_size = 2;
3052 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3053 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3054 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3055 						   wr, bad_wr);
3056 		rwqe->length = cpu_to_le32(length);
3057 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3058 		qp->prev_wqe_size = rwqe->wqe_size;
3059 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3060 		break;
3061 	case IB_WR_RDMA_WRITE:
3062 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3063 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3064 
3065 		rwqe->wqe_size = 2;
3066 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3067 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3068 						   wr, bad_wr);
3069 		rwqe->length = cpu_to_le32(length);
3070 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3071 		qp->prev_wqe_size = rwqe->wqe_size;
3072 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3073 		break;
3074 	case IB_WR_RDMA_READ_WITH_INV:
3075 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3076 		/* fallthrough -- same is identical to RDMA READ */
3077 
3078 	case IB_WR_RDMA_READ:
3079 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3080 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3081 
3082 		rwqe->wqe_size = 2;
3083 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3084 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3085 						   wr, bad_wr);
3086 		rwqe->length = cpu_to_le32(length);
3087 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3088 		qp->prev_wqe_size = rwqe->wqe_size;
3089 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3090 		break;
3091 
3092 	case IB_WR_ATOMIC_CMP_AND_SWP:
3093 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3094 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3095 		awqe1->wqe_size = 4;
3096 
3097 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3098 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3099 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3100 
3101 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3102 
3103 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3104 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3105 			DMA_REGPAIR_LE(awqe3->swap_data,
3106 				       atomic_wr(wr)->compare_add);
3107 		} else {
3108 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3109 			DMA_REGPAIR_LE(awqe3->swap_data,
3110 				       atomic_wr(wr)->swap);
3111 			DMA_REGPAIR_LE(awqe3->cmp_data,
3112 				       atomic_wr(wr)->compare_add);
3113 		}
3114 
3115 		qedr_prepare_sq_sges(qp, NULL, wr);
3116 
3117 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3118 		qp->prev_wqe_size = awqe1->wqe_size;
3119 		break;
3120 
3121 	case IB_WR_LOCAL_INV:
3122 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3123 		iwqe->wqe_size = 1;
3124 
3125 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3126 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3127 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3128 		qp->prev_wqe_size = iwqe->wqe_size;
3129 		break;
3130 	case IB_WR_REG_MR:
3131 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3132 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3133 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3134 		fwqe1->wqe_size = 2;
3135 
3136 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3137 		if (rc) {
3138 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3139 			*bad_wr = wr;
3140 			break;
3141 		}
3142 
3143 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3144 		qp->prev_wqe_size = fwqe1->wqe_size;
3145 		break;
3146 	default:
3147 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3148 		rc = -EINVAL;
3149 		*bad_wr = wr;
3150 		break;
3151 	}
3152 
3153 	if (*bad_wr) {
3154 		u16 value;
3155 
3156 		/* Restore prod to its position before
3157 		 * this WR was processed
3158 		 */
3159 		value = le16_to_cpu(qp->sq.db_data.data.value);
3160 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3161 
3162 		/* Restore prev_wqe_size */
3163 		qp->prev_wqe_size = wqe->prev_wqe_size;
3164 		rc = -EINVAL;
3165 		DP_ERR(dev, "POST SEND FAILED\n");
3166 	}
3167 
3168 	return rc;
3169 }
3170 
3171 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3172 		   struct ib_send_wr **bad_wr)
3173 {
3174 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3175 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3176 	unsigned long flags;
3177 	int rc = 0;
3178 
3179 	*bad_wr = NULL;
3180 
3181 	if (qp->qp_type == IB_QPT_GSI)
3182 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3183 
3184 	spin_lock_irqsave(&qp->q_lock, flags);
3185 
3186 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3187 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3188 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3189 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3190 			spin_unlock_irqrestore(&qp->q_lock, flags);
3191 			*bad_wr = wr;
3192 			DP_DEBUG(dev, QEDR_MSG_CQ,
3193 				 "QP in wrong state! QP icid=0x%x state %d\n",
3194 				 qp->icid, qp->state);
3195 			return -EINVAL;
3196 		}
3197 	}
3198 
3199 	while (wr) {
3200 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3201 		if (rc)
3202 			break;
3203 
3204 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3205 
3206 		qedr_inc_sw_prod(&qp->sq);
3207 
3208 		qp->sq.db_data.data.value++;
3209 
3210 		wr = wr->next;
3211 	}
3212 
3213 	/* Trigger doorbell
3214 	 * If there was a failure in the first WR then it will be triggered in
3215 	 * vane. However this is not harmful (as long as the producer value is
3216 	 * unchanged). For performance reasons we avoid checking for this
3217 	 * redundant doorbell.
3218 	 *
3219 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3220 	 * soon as we give the doorbell, we could get a completion
3221 	 * for this wr, therefore we need to make sure that the
3222 	 * memory is updated before giving the doorbell.
3223 	 * During qedr_poll_cq, rmb is called before accessing the
3224 	 * cqe. This covers for the smp_rmb as well.
3225 	 */
3226 	smp_wmb();
3227 	writel(qp->sq.db_data.raw, qp->sq.db);
3228 
3229 	/* Make sure write sticks */
3230 	mmiowb();
3231 
3232 	spin_unlock_irqrestore(&qp->q_lock, flags);
3233 
3234 	return rc;
3235 }
3236 
3237 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3238 		   struct ib_recv_wr **bad_wr)
3239 {
3240 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3241 	struct qedr_dev *dev = qp->dev;
3242 	unsigned long flags;
3243 	int status = 0;
3244 
3245 	if (qp->qp_type == IB_QPT_GSI)
3246 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3247 
3248 	spin_lock_irqsave(&qp->q_lock, flags);
3249 
3250 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3251 		spin_unlock_irqrestore(&qp->q_lock, flags);
3252 		*bad_wr = wr;
3253 		return -EINVAL;
3254 	}
3255 
3256 	while (wr) {
3257 		int i;
3258 
3259 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3260 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3261 		    wr->num_sge > qp->rq.max_sges) {
3262 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3263 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3264 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3265 			       qp->rq.max_sges);
3266 			status = -ENOMEM;
3267 			*bad_wr = wr;
3268 			break;
3269 		}
3270 		for (i = 0; i < wr->num_sge; i++) {
3271 			u32 flags = 0;
3272 			struct rdma_rq_sge *rqe =
3273 			    qed_chain_produce(&qp->rq.pbl);
3274 
3275 			/* First one must include the number
3276 			 * of SGE in the list
3277 			 */
3278 			if (!i)
3279 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3280 					  wr->num_sge);
3281 
3282 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3283 				  wr->sg_list[i].lkey);
3284 
3285 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3286 				   wr->sg_list[i].length, flags);
3287 		}
3288 
3289 		/* Special case of no sges. FW requires between 1-4 sges...
3290 		 * in this case we need to post 1 sge with length zero. this is
3291 		 * because rdma write with immediate consumes an RQ.
3292 		 */
3293 		if (!wr->num_sge) {
3294 			u32 flags = 0;
3295 			struct rdma_rq_sge *rqe =
3296 			    qed_chain_produce(&qp->rq.pbl);
3297 
3298 			/* First one must include the number
3299 			 * of SGE in the list
3300 			 */
3301 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3302 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3303 
3304 			RQ_SGE_SET(rqe, 0, 0, flags);
3305 			i = 1;
3306 		}
3307 
3308 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3309 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3310 
3311 		qedr_inc_sw_prod(&qp->rq);
3312 
3313 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3314 		 * soon as we give the doorbell, we could get a completion
3315 		 * for this wr, therefore we need to make sure that the
3316 		 * memory is update before giving the doorbell.
3317 		 * During qedr_poll_cq, rmb is called before accessing the
3318 		 * cqe. This covers for the smp_rmb as well.
3319 		 */
3320 		smp_wmb();
3321 
3322 		qp->rq.db_data.data.value++;
3323 
3324 		writel(qp->rq.db_data.raw, qp->rq.db);
3325 
3326 		/* Make sure write sticks */
3327 		mmiowb();
3328 
3329 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3330 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3331 			mmiowb();	/* for second doorbell */
3332 		}
3333 
3334 		wr = wr->next;
3335 	}
3336 
3337 	spin_unlock_irqrestore(&qp->q_lock, flags);
3338 
3339 	return status;
3340 }
3341 
3342 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3343 {
3344 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3345 
3346 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3347 		cq->pbl_toggle;
3348 }
3349 
3350 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3351 {
3352 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3353 	struct qedr_qp *qp;
3354 
3355 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3356 						   resp_cqe->qp_handle.lo,
3357 						   u64);
3358 	return qp;
3359 }
3360 
3361 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3362 {
3363 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3364 
3365 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3366 }
3367 
3368 /* Return latest CQE (needs processing) */
3369 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3370 {
3371 	return cq->latest_cqe;
3372 }
3373 
3374 /* In fmr we need to increase the number of fmr completed counter for the fmr
3375  * algorithm determining whether we can free a pbl or not.
3376  * we need to perform this whether the work request was signaled or not. for
3377  * this purpose we call this function from the condition that checks if a wr
3378  * should be skipped, to make sure we don't miss it ( possibly this fmr
3379  * operation was not signalted)
3380  */
3381 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3382 {
3383 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3384 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3385 }
3386 
3387 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3388 		       struct qedr_cq *cq, int num_entries,
3389 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3390 		       int force)
3391 {
3392 	u16 cnt = 0;
3393 
3394 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3395 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3396 			qedr_chk_if_fmr(qp);
3397 			/* skip WC */
3398 			goto next_cqe;
3399 		}
3400 
3401 		/* fill WC */
3402 		wc->status = status;
3403 		wc->vendor_err = 0;
3404 		wc->wc_flags = 0;
3405 		wc->src_qp = qp->id;
3406 		wc->qp = &qp->ibqp;
3407 
3408 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3409 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3410 
3411 		switch (wc->opcode) {
3412 		case IB_WC_RDMA_WRITE:
3413 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3414 			break;
3415 		case IB_WC_COMP_SWAP:
3416 		case IB_WC_FETCH_ADD:
3417 			wc->byte_len = 8;
3418 			break;
3419 		case IB_WC_REG_MR:
3420 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3421 			break;
3422 		case IB_WC_RDMA_READ:
3423 		case IB_WC_SEND:
3424 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3425 			break;
3426 		default:
3427 			break;
3428 		}
3429 
3430 		num_entries--;
3431 		wc++;
3432 		cnt++;
3433 next_cqe:
3434 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3435 			qed_chain_consume(&qp->sq.pbl);
3436 		qedr_inc_sw_cons(&qp->sq);
3437 	}
3438 
3439 	return cnt;
3440 }
3441 
3442 static int qedr_poll_cq_req(struct qedr_dev *dev,
3443 			    struct qedr_qp *qp, struct qedr_cq *cq,
3444 			    int num_entries, struct ib_wc *wc,
3445 			    struct rdma_cqe_requester *req)
3446 {
3447 	int cnt = 0;
3448 
3449 	switch (req->status) {
3450 	case RDMA_CQE_REQ_STS_OK:
3451 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3452 				  IB_WC_SUCCESS, 0);
3453 		break;
3454 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3455 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3456 			DP_DEBUG(dev, QEDR_MSG_CQ,
3457 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3458 				 cq->icid, qp->icid);
3459 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3460 				  IB_WC_WR_FLUSH_ERR, 1);
3461 		break;
3462 	default:
3463 		/* process all WQE before the cosumer */
3464 		qp->state = QED_ROCE_QP_STATE_ERR;
3465 		cnt = process_req(dev, qp, cq, num_entries, wc,
3466 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3467 		wc += cnt;
3468 		/* if we have extra WC fill it with actual error info */
3469 		if (cnt < num_entries) {
3470 			enum ib_wc_status wc_status;
3471 
3472 			switch (req->status) {
3473 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3474 				DP_ERR(dev,
3475 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3476 				       cq->icid, qp->icid);
3477 				wc_status = IB_WC_BAD_RESP_ERR;
3478 				break;
3479 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3480 				DP_ERR(dev,
3481 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3482 				       cq->icid, qp->icid);
3483 				wc_status = IB_WC_LOC_LEN_ERR;
3484 				break;
3485 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3486 				DP_ERR(dev,
3487 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3488 				       cq->icid, qp->icid);
3489 				wc_status = IB_WC_LOC_QP_OP_ERR;
3490 				break;
3491 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3492 				DP_ERR(dev,
3493 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3494 				       cq->icid, qp->icid);
3495 				wc_status = IB_WC_LOC_PROT_ERR;
3496 				break;
3497 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3498 				DP_ERR(dev,
3499 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3500 				       cq->icid, qp->icid);
3501 				wc_status = IB_WC_MW_BIND_ERR;
3502 				break;
3503 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3504 				DP_ERR(dev,
3505 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3506 				       cq->icid, qp->icid);
3507 				wc_status = IB_WC_REM_INV_REQ_ERR;
3508 				break;
3509 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3510 				DP_ERR(dev,
3511 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3512 				       cq->icid, qp->icid);
3513 				wc_status = IB_WC_REM_ACCESS_ERR;
3514 				break;
3515 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3516 				DP_ERR(dev,
3517 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3518 				       cq->icid, qp->icid);
3519 				wc_status = IB_WC_REM_OP_ERR;
3520 				break;
3521 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3522 				DP_ERR(dev,
3523 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3524 				       cq->icid, qp->icid);
3525 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3526 				break;
3527 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3528 				DP_ERR(dev,
3529 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3530 				       cq->icid, qp->icid);
3531 				wc_status = IB_WC_RETRY_EXC_ERR;
3532 				break;
3533 			default:
3534 				DP_ERR(dev,
3535 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3536 				       cq->icid, qp->icid);
3537 				wc_status = IB_WC_GENERAL_ERR;
3538 			}
3539 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3540 					   wc_status, 1);
3541 		}
3542 	}
3543 
3544 	return cnt;
3545 }
3546 
3547 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3548 {
3549 	switch (status) {
3550 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3551 		return IB_WC_LOC_ACCESS_ERR;
3552 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3553 		return IB_WC_LOC_LEN_ERR;
3554 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3555 		return IB_WC_LOC_QP_OP_ERR;
3556 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3557 		return IB_WC_LOC_PROT_ERR;
3558 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3559 		return IB_WC_MW_BIND_ERR;
3560 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3561 		return IB_WC_REM_INV_RD_REQ_ERR;
3562 	case RDMA_CQE_RESP_STS_OK:
3563 		return IB_WC_SUCCESS;
3564 	default:
3565 		return IB_WC_GENERAL_ERR;
3566 	}
3567 }
3568 
3569 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3570 					  struct ib_wc *wc)
3571 {
3572 	wc->status = IB_WC_SUCCESS;
3573 	wc->byte_len = le32_to_cpu(resp->length);
3574 
3575 	if (resp->flags & QEDR_RESP_IMM) {
3576 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3577 		wc->wc_flags |= IB_WC_WITH_IMM;
3578 
3579 		if (resp->flags & QEDR_RESP_RDMA)
3580 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3581 
3582 		if (resp->flags & QEDR_RESP_INV)
3583 			return -EINVAL;
3584 
3585 	} else if (resp->flags & QEDR_RESP_INV) {
3586 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3587 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3588 
3589 		if (resp->flags & QEDR_RESP_RDMA)
3590 			return -EINVAL;
3591 
3592 	} else if (resp->flags & QEDR_RESP_RDMA) {
3593 		return -EINVAL;
3594 	}
3595 
3596 	return 0;
3597 }
3598 
3599 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3600 			       struct qedr_cq *cq, struct ib_wc *wc,
3601 			       struct rdma_cqe_responder *resp, u64 wr_id)
3602 {
3603 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3604 	wc->opcode = IB_WC_RECV;
3605 	wc->wc_flags = 0;
3606 
3607 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3608 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3609 			DP_ERR(dev,
3610 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3611 			       cq, cq->icid, resp->flags);
3612 
3613 	} else {
3614 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3615 		if (wc->status == IB_WC_GENERAL_ERR)
3616 			DP_ERR(dev,
3617 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3618 			       cq, cq->icid, resp->status);
3619 	}
3620 
3621 	/* Fill the rest of the WC */
3622 	wc->vendor_err = 0;
3623 	wc->src_qp = qp->id;
3624 	wc->qp = &qp->ibqp;
3625 	wc->wr_id = wr_id;
3626 }
3627 
3628 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3629 			    struct qedr_cq *cq, struct ib_wc *wc,
3630 			    struct rdma_cqe_responder *resp)
3631 {
3632 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3633 
3634 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3635 
3636 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3637 		qed_chain_consume(&qp->rq.pbl);
3638 	qedr_inc_sw_cons(&qp->rq);
3639 
3640 	return 1;
3641 }
3642 
3643 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3644 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3645 {
3646 	u16 cnt = 0;
3647 
3648 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3649 		/* fill WC */
3650 		wc->status = IB_WC_WR_FLUSH_ERR;
3651 		wc->vendor_err = 0;
3652 		wc->wc_flags = 0;
3653 		wc->src_qp = qp->id;
3654 		wc->byte_len = 0;
3655 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3656 		wc->qp = &qp->ibqp;
3657 		num_entries--;
3658 		wc++;
3659 		cnt++;
3660 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3661 			qed_chain_consume(&qp->rq.pbl);
3662 		qedr_inc_sw_cons(&qp->rq);
3663 	}
3664 
3665 	return cnt;
3666 }
3667 
3668 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3669 				 struct rdma_cqe_responder *resp, int *update)
3670 {
3671 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
3672 		consume_cqe(cq);
3673 		*update |= 1;
3674 	}
3675 }
3676 
3677 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3678 			     struct qedr_cq *cq, int num_entries,
3679 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3680 			     int *update)
3681 {
3682 	int cnt;
3683 
3684 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3685 		cnt = process_resp_flush(qp, cq, num_entries, wc,
3686 					 resp->rq_cons_or_srq_id);
3687 		try_consume_resp_cqe(cq, qp, resp, update);
3688 	} else {
3689 		cnt = process_resp_one(dev, qp, cq, wc, resp);
3690 		consume_cqe(cq);
3691 		*update |= 1;
3692 	}
3693 
3694 	return cnt;
3695 }
3696 
3697 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3698 				struct rdma_cqe_requester *req, int *update)
3699 {
3700 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3701 		consume_cqe(cq);
3702 		*update |= 1;
3703 	}
3704 }
3705 
3706 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3707 {
3708 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3709 	struct qedr_cq *cq = get_qedr_cq(ibcq);
3710 	union rdma_cqe *cqe;
3711 	u32 old_cons, new_cons;
3712 	unsigned long flags;
3713 	int update = 0;
3714 	int done = 0;
3715 
3716 	if (cq->destroyed) {
3717 		DP_ERR(dev,
3718 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3719 		       cq, cq->icid);
3720 		return 0;
3721 	}
3722 
3723 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3724 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3725 
3726 	spin_lock_irqsave(&cq->cq_lock, flags);
3727 	cqe = cq->latest_cqe;
3728 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3729 	while (num_entries && is_valid_cqe(cq, cqe)) {
3730 		struct qedr_qp *qp;
3731 		int cnt = 0;
3732 
3733 		/* prevent speculative reads of any field of CQE */
3734 		rmb();
3735 
3736 		qp = cqe_get_qp(cqe);
3737 		if (!qp) {
3738 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3739 			break;
3740 		}
3741 
3742 		wc->qp = &qp->ibqp;
3743 
3744 		switch (cqe_get_type(cqe)) {
3745 		case RDMA_CQE_TYPE_REQUESTER:
3746 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3747 					       &cqe->req);
3748 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3749 			break;
3750 		case RDMA_CQE_TYPE_RESPONDER_RQ:
3751 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3752 						&cqe->resp, &update);
3753 			break;
3754 		case RDMA_CQE_TYPE_INVALID:
3755 		default:
3756 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3757 			       cqe_get_type(cqe));
3758 		}
3759 		num_entries -= cnt;
3760 		wc += cnt;
3761 		done += cnt;
3762 
3763 		cqe = get_cqe(cq);
3764 	}
3765 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3766 
3767 	cq->cq_cons += new_cons - old_cons;
3768 
3769 	if (update)
3770 		/* doorbell notifies abount latest VALID entry,
3771 		 * but chain already point to the next INVALID one
3772 		 */
3773 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3774 
3775 	spin_unlock_irqrestore(&cq->cq_lock, flags);
3776 	return done;
3777 }
3778 
3779 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3780 		     u8 port_num,
3781 		     const struct ib_wc *in_wc,
3782 		     const struct ib_grh *in_grh,
3783 		     const struct ib_mad_hdr *mad_hdr,
3784 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3785 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3786 {
3787 	struct qedr_dev *dev = get_qedr_dev(ibdev);
3788 
3789 	DP_DEBUG(dev, QEDR_MSG_GSI,
3790 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3791 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3792 		 mad_hdr->class_specific, mad_hdr->class_version,
3793 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3794 	return IB_MAD_RESULT_SUCCESS;
3795 }
3796