xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 8fdf9062)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_roce_cm.h"
53 
54 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
55 #define	RDMA_MAX_SGE_PER_SRQ	(4)
56 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
57 
58 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
59 
60 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
61 					size_t len)
62 {
63 	size_t min_len = min_t(size_t, len, udata->outlen);
64 
65 	return ib_copy_to_udata(udata, src, min_len);
66 }
67 
68 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
69 {
70 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
71 		return -EINVAL;
72 
73 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
74 	return 0;
75 }
76 
77 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
78 		      int index, union ib_gid *sgid)
79 {
80 	struct qedr_dev *dev = get_qedr_dev(ibdev);
81 
82 	memset(sgid->raw, 0, sizeof(sgid->raw));
83 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
84 
85 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
86 		 sgid->global.interface_id, sgid->global.subnet_prefix);
87 
88 	return 0;
89 }
90 
91 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
92 {
93 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
94 	struct qedr_device_attr *qattr = &dev->attr;
95 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
96 
97 	srq_attr->srq_limit = srq->srq_limit;
98 	srq_attr->max_wr = qattr->max_srq_wr;
99 	srq_attr->max_sge = qattr->max_sge;
100 
101 	return 0;
102 }
103 
104 int qedr_query_device(struct ib_device *ibdev,
105 		      struct ib_device_attr *attr, struct ib_udata *udata)
106 {
107 	struct qedr_dev *dev = get_qedr_dev(ibdev);
108 	struct qedr_device_attr *qattr = &dev->attr;
109 
110 	if (!dev->rdma_ctx) {
111 		DP_ERR(dev,
112 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
113 		       dev->rdma_ctx);
114 		return -EINVAL;
115 	}
116 
117 	memset(attr, 0, sizeof(*attr));
118 
119 	attr->fw_ver = qattr->fw_ver;
120 	attr->sys_image_guid = qattr->sys_image_guid;
121 	attr->max_mr_size = qattr->max_mr_size;
122 	attr->page_size_cap = qattr->page_size_caps;
123 	attr->vendor_id = qattr->vendor_id;
124 	attr->vendor_part_id = qattr->vendor_part_id;
125 	attr->hw_ver = qattr->hw_ver;
126 	attr->max_qp = qattr->max_qp;
127 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
128 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
129 	    IB_DEVICE_RC_RNR_NAK_GEN |
130 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
131 
132 	attr->max_send_sge = qattr->max_sge;
133 	attr->max_recv_sge = qattr->max_sge;
134 	attr->max_sge_rd = qattr->max_sge;
135 	attr->max_cq = qattr->max_cq;
136 	attr->max_cqe = qattr->max_cqe;
137 	attr->max_mr = qattr->max_mr;
138 	attr->max_mw = qattr->max_mw;
139 	attr->max_pd = qattr->max_pd;
140 	attr->atomic_cap = dev->atomic_cap;
141 	attr->max_fmr = qattr->max_fmr;
142 	attr->max_map_per_fmr = 16;
143 	attr->max_qp_init_rd_atom =
144 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
145 	attr->max_qp_rd_atom =
146 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
147 		attr->max_qp_init_rd_atom);
148 
149 	attr->max_srq = qattr->max_srq;
150 	attr->max_srq_sge = qattr->max_srq_sge;
151 	attr->max_srq_wr = qattr->max_srq_wr;
152 
153 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
154 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
155 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
156 	attr->max_ah = qattr->max_ah;
157 
158 	return 0;
159 }
160 
161 #define QEDR_SPEED_SDR		(1)
162 #define QEDR_SPEED_DDR		(2)
163 #define QEDR_SPEED_QDR		(4)
164 #define QEDR_SPEED_FDR10	(8)
165 #define QEDR_SPEED_FDR		(16)
166 #define QEDR_SPEED_EDR		(32)
167 
168 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
169 					    u8 *ib_width)
170 {
171 	switch (speed) {
172 	case 1000:
173 		*ib_speed = QEDR_SPEED_SDR;
174 		*ib_width = IB_WIDTH_1X;
175 		break;
176 	case 10000:
177 		*ib_speed = QEDR_SPEED_QDR;
178 		*ib_width = IB_WIDTH_1X;
179 		break;
180 
181 	case 20000:
182 		*ib_speed = QEDR_SPEED_DDR;
183 		*ib_width = IB_WIDTH_4X;
184 		break;
185 
186 	case 25000:
187 		*ib_speed = QEDR_SPEED_EDR;
188 		*ib_width = IB_WIDTH_1X;
189 		break;
190 
191 	case 40000:
192 		*ib_speed = QEDR_SPEED_QDR;
193 		*ib_width = IB_WIDTH_4X;
194 		break;
195 
196 	case 50000:
197 		*ib_speed = QEDR_SPEED_QDR;
198 		*ib_width = IB_WIDTH_4X;
199 		break;
200 
201 	case 100000:
202 		*ib_speed = QEDR_SPEED_EDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	default:
207 		/* Unsupported */
208 		*ib_speed = QEDR_SPEED_SDR;
209 		*ib_width = IB_WIDTH_1X;
210 	}
211 }
212 
213 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
214 {
215 	struct qedr_dev *dev;
216 	struct qed_rdma_port *rdma_port;
217 
218 	dev = get_qedr_dev(ibdev);
219 
220 	if (!dev->rdma_ctx) {
221 		DP_ERR(dev, "rdma_ctx is NULL\n");
222 		return -EINVAL;
223 	}
224 
225 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
226 
227 	/* *attr being zeroed by the caller, avoid zeroing it here */
228 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
229 		attr->state = IB_PORT_ACTIVE;
230 		attr->phys_state = 5;
231 	} else {
232 		attr->state = IB_PORT_DOWN;
233 		attr->phys_state = 3;
234 	}
235 	attr->max_mtu = IB_MTU_4096;
236 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
237 	attr->lid = 0;
238 	attr->lmc = 0;
239 	attr->sm_lid = 0;
240 	attr->sm_sl = 0;
241 	attr->ip_gids = true;
242 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
243 		attr->gid_tbl_len = 1;
244 		attr->pkey_tbl_len = 1;
245 	} else {
246 		attr->gid_tbl_len = QEDR_MAX_SGID;
247 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
248 	}
249 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
250 	attr->qkey_viol_cntr = 0;
251 	get_link_speed_and_width(rdma_port->link_speed,
252 				 &attr->active_speed, &attr->active_width);
253 	attr->max_msg_sz = rdma_port->max_msg_size;
254 	attr->max_vl_num = 4;
255 
256 	return 0;
257 }
258 
259 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
260 		     struct ib_port_modify *props)
261 {
262 	return 0;
263 }
264 
265 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
266 			 unsigned long len)
267 {
268 	struct qedr_mm *mm;
269 
270 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
271 	if (!mm)
272 		return -ENOMEM;
273 
274 	mm->key.phy_addr = phy_addr;
275 	/* This function might be called with a length which is not a multiple
276 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
277 	 * forces this granularity by increasing the requested size if needed.
278 	 * When qedr_mmap is called, it will search the list with the updated
279 	 * length as a key. To prevent search failures, the length is rounded up
280 	 * in advance to PAGE_SIZE.
281 	 */
282 	mm->key.len = roundup(len, PAGE_SIZE);
283 	INIT_LIST_HEAD(&mm->entry);
284 
285 	mutex_lock(&uctx->mm_list_lock);
286 	list_add(&mm->entry, &uctx->mm_head);
287 	mutex_unlock(&uctx->mm_list_lock);
288 
289 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
290 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
291 		 (unsigned long long)mm->key.phy_addr,
292 		 (unsigned long)mm->key.len, uctx);
293 
294 	return 0;
295 }
296 
297 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
298 			     unsigned long len)
299 {
300 	bool found = false;
301 	struct qedr_mm *mm;
302 
303 	mutex_lock(&uctx->mm_list_lock);
304 	list_for_each_entry(mm, &uctx->mm_head, entry) {
305 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
306 			continue;
307 
308 		found = true;
309 		break;
310 	}
311 	mutex_unlock(&uctx->mm_list_lock);
312 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
313 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
314 		 mm->key.phy_addr, mm->key.len, uctx, found);
315 
316 	return found;
317 }
318 
319 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
320 					struct ib_udata *udata)
321 {
322 	int rc;
323 	struct qedr_ucontext *ctx;
324 	struct qedr_alloc_ucontext_resp uresp;
325 	struct qedr_dev *dev = get_qedr_dev(ibdev);
326 	struct qed_rdma_add_user_out_params oparams;
327 
328 	if (!udata)
329 		return ERR_PTR(-EFAULT);
330 
331 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
332 	if (!ctx)
333 		return ERR_PTR(-ENOMEM);
334 
335 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
336 	if (rc) {
337 		DP_ERR(dev,
338 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
339 		       rc);
340 		goto err;
341 	}
342 
343 	ctx->dpi = oparams.dpi;
344 	ctx->dpi_addr = oparams.dpi_addr;
345 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
346 	ctx->dpi_size = oparams.dpi_size;
347 	INIT_LIST_HEAD(&ctx->mm_head);
348 	mutex_init(&ctx->mm_list_lock);
349 
350 	memset(&uresp, 0, sizeof(uresp));
351 
352 	uresp.dpm_enabled = dev->user_dpm_enabled;
353 	uresp.wids_enabled = 1;
354 	uresp.wid_count = oparams.wid_count;
355 	uresp.db_pa = ctx->dpi_phys_addr;
356 	uresp.db_size = ctx->dpi_size;
357 	uresp.max_send_wr = dev->attr.max_sqe;
358 	uresp.max_recv_wr = dev->attr.max_rqe;
359 	uresp.max_srq_wr = dev->attr.max_srq_wr;
360 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
361 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
362 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
363 	uresp.max_cqes = QEDR_MAX_CQES;
364 
365 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
366 	if (rc)
367 		goto err;
368 
369 	ctx->dev = dev;
370 
371 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
372 	if (rc)
373 		goto err;
374 
375 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
376 		 &ctx->ibucontext);
377 	return &ctx->ibucontext;
378 
379 err:
380 	kfree(ctx);
381 	return ERR_PTR(rc);
382 }
383 
384 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
385 {
386 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
387 	struct qedr_mm *mm, *tmp;
388 	int status = 0;
389 
390 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
391 		 uctx);
392 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
393 
394 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
395 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
396 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
397 			 mm->key.phy_addr, mm->key.len, uctx);
398 		list_del(&mm->entry);
399 		kfree(mm);
400 	}
401 
402 	kfree(uctx);
403 	return status;
404 }
405 
406 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
407 {
408 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
409 	struct qedr_dev *dev = get_qedr_dev(context->device);
410 	unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
411 	unsigned long len = (vma->vm_end - vma->vm_start);
412 	unsigned long dpi_start;
413 
414 	dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
415 
416 	DP_DEBUG(dev, QEDR_MSG_INIT,
417 		 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
418 		 (void *)vma->vm_start, (void *)vma->vm_end,
419 		 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
420 
421 	if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
422 		DP_ERR(dev,
423 		       "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
424 		       (void *)vma->vm_start, (void *)vma->vm_end);
425 		return -EINVAL;
426 	}
427 
428 	if (!qedr_search_mmap(ucontext, phys_addr, len)) {
429 		DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
430 		       vma->vm_pgoff);
431 		return -EINVAL;
432 	}
433 
434 	if (phys_addr < dpi_start ||
435 	    ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
436 		DP_ERR(dev,
437 		       "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
438 		       (void *)phys_addr, (void *)dpi_start,
439 		       ucontext->dpi_size);
440 		return -EINVAL;
441 	}
442 
443 	if (vma->vm_flags & VM_READ) {
444 		DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
445 		return -EINVAL;
446 	}
447 
448 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
449 	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
450 				  vma->vm_page_prot);
451 }
452 
453 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
454 			    struct ib_ucontext *context, struct ib_udata *udata)
455 {
456 	struct qedr_dev *dev = get_qedr_dev(ibdev);
457 	struct qedr_pd *pd;
458 	u16 pd_id;
459 	int rc;
460 
461 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
462 		 (udata && context) ? "User Lib" : "Kernel");
463 
464 	if (!dev->rdma_ctx) {
465 		DP_ERR(dev, "invalid RDMA context\n");
466 		return ERR_PTR(-EINVAL);
467 	}
468 
469 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
470 	if (!pd)
471 		return ERR_PTR(-ENOMEM);
472 
473 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
474 	if (rc)
475 		goto err;
476 
477 	pd->pd_id = pd_id;
478 
479 	if (udata && context) {
480 		struct qedr_alloc_pd_uresp uresp = {
481 			.pd_id = pd_id,
482 		};
483 
484 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
485 		if (rc) {
486 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
487 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
488 			goto err;
489 		}
490 
491 		pd->uctx = get_qedr_ucontext(context);
492 		pd->uctx->pd = pd;
493 	}
494 
495 	return &pd->ibpd;
496 
497 err:
498 	kfree(pd);
499 	return ERR_PTR(rc);
500 }
501 
502 int qedr_dealloc_pd(struct ib_pd *ibpd)
503 {
504 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
505 	struct qedr_pd *pd = get_qedr_pd(ibpd);
506 
507 	if (!pd) {
508 		pr_err("Invalid PD received in dealloc_pd\n");
509 		return -EINVAL;
510 	}
511 
512 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
513 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
514 
515 	kfree(pd);
516 
517 	return 0;
518 }
519 
520 static void qedr_free_pbl(struct qedr_dev *dev,
521 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
522 {
523 	struct pci_dev *pdev = dev->pdev;
524 	int i;
525 
526 	for (i = 0; i < pbl_info->num_pbls; i++) {
527 		if (!pbl[i].va)
528 			continue;
529 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
530 				  pbl[i].va, pbl[i].pa);
531 	}
532 
533 	kfree(pbl);
534 }
535 
536 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
537 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
538 
539 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
540 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
541 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
542 
543 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
544 					   struct qedr_pbl_info *pbl_info,
545 					   gfp_t flags)
546 {
547 	struct pci_dev *pdev = dev->pdev;
548 	struct qedr_pbl *pbl_table;
549 	dma_addr_t *pbl_main_tbl;
550 	dma_addr_t pa;
551 	void *va;
552 	int i;
553 
554 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
555 	if (!pbl_table)
556 		return ERR_PTR(-ENOMEM);
557 
558 	for (i = 0; i < pbl_info->num_pbls; i++) {
559 		va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
560 					 &pa, flags);
561 		if (!va)
562 			goto err;
563 
564 		pbl_table[i].va = va;
565 		pbl_table[i].pa = pa;
566 	}
567 
568 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
569 	 * the first one with physical pointers to all of the rest
570 	 */
571 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
572 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
573 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
574 
575 	return pbl_table;
576 
577 err:
578 	for (i--; i >= 0; i--)
579 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
580 				  pbl_table[i].va, pbl_table[i].pa);
581 
582 	qedr_free_pbl(dev, pbl_info, pbl_table);
583 
584 	return ERR_PTR(-ENOMEM);
585 }
586 
587 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
588 				struct qedr_pbl_info *pbl_info,
589 				u32 num_pbes, int two_layer_capable)
590 {
591 	u32 pbl_capacity;
592 	u32 pbl_size;
593 	u32 num_pbls;
594 
595 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
596 		if (num_pbes > MAX_PBES_TWO_LAYER) {
597 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
598 			       num_pbes);
599 			return -EINVAL;
600 		}
601 
602 		/* calculate required pbl page size */
603 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
604 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
605 			       NUM_PBES_ON_PAGE(pbl_size);
606 
607 		while (pbl_capacity < num_pbes) {
608 			pbl_size *= 2;
609 			pbl_capacity = pbl_size / sizeof(u64);
610 			pbl_capacity = pbl_capacity * pbl_capacity;
611 		}
612 
613 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
614 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
615 		pbl_info->two_layered = true;
616 	} else {
617 		/* One layered PBL */
618 		num_pbls = 1;
619 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
620 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
621 		pbl_info->two_layered = false;
622 	}
623 
624 	pbl_info->num_pbls = num_pbls;
625 	pbl_info->pbl_size = pbl_size;
626 	pbl_info->num_pbes = num_pbes;
627 
628 	DP_DEBUG(dev, QEDR_MSG_MR,
629 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
630 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
631 
632 	return 0;
633 }
634 
635 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
636 			       struct qedr_pbl *pbl,
637 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
638 {
639 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
640 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
641 	struct qedr_pbl *pbl_tbl;
642 	struct scatterlist *sg;
643 	struct regpair *pbe;
644 	u64 pg_addr;
645 	int entry;
646 
647 	if (!pbl_info->num_pbes)
648 		return;
649 
650 	/* If we have a two layered pbl, the first pbl points to the rest
651 	 * of the pbls and the first entry lays on the second pbl in the table
652 	 */
653 	if (pbl_info->two_layered)
654 		pbl_tbl = &pbl[1];
655 	else
656 		pbl_tbl = pbl;
657 
658 	pbe = (struct regpair *)pbl_tbl->va;
659 	if (!pbe) {
660 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
661 		return;
662 	}
663 
664 	pbe_cnt = 0;
665 
666 	shift = umem->page_shift;
667 
668 	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
669 
670 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
671 		pages = sg_dma_len(sg) >> shift;
672 		pg_addr = sg_dma_address(sg);
673 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
674 			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
675 				pbe->lo = cpu_to_le32(pg_addr);
676 				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
677 
678 				pg_addr += BIT(pg_shift);
679 				pbe_cnt++;
680 				total_num_pbes++;
681 				pbe++;
682 
683 				if (total_num_pbes == pbl_info->num_pbes)
684 					return;
685 
686 				/* If the given pbl is full storing the pbes,
687 				 * move to next pbl.
688 				 */
689 				if (pbe_cnt ==
690 				    (pbl_info->pbl_size / sizeof(u64))) {
691 					pbl_tbl++;
692 					pbe = (struct regpair *)pbl_tbl->va;
693 					pbe_cnt = 0;
694 				}
695 
696 				fw_pg_cnt++;
697 			}
698 		}
699 	}
700 }
701 
702 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
703 			      struct qedr_cq *cq, struct ib_udata *udata)
704 {
705 	struct qedr_create_cq_uresp uresp;
706 	int rc;
707 
708 	memset(&uresp, 0, sizeof(uresp));
709 
710 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
711 	uresp.icid = cq->icid;
712 
713 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
714 	if (rc)
715 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
716 
717 	return rc;
718 }
719 
720 static void consume_cqe(struct qedr_cq *cq)
721 {
722 	if (cq->latest_cqe == cq->toggle_cqe)
723 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
724 
725 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
726 }
727 
728 static inline int qedr_align_cq_entries(int entries)
729 {
730 	u64 size, aligned_size;
731 
732 	/* We allocate an extra entry that we don't report to the FW. */
733 	size = (entries + 1) * QEDR_CQE_SIZE;
734 	aligned_size = ALIGN(size, PAGE_SIZE);
735 
736 	return aligned_size / QEDR_CQE_SIZE;
737 }
738 
739 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
740 				       struct qedr_dev *dev,
741 				       struct qedr_userq *q,
742 				       u64 buf_addr, size_t buf_len,
743 				       int access, int dmasync,
744 				       int alloc_and_init)
745 {
746 	u32 fw_pages;
747 	int rc;
748 
749 	q->buf_addr = buf_addr;
750 	q->buf_len = buf_len;
751 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
752 	if (IS_ERR(q->umem)) {
753 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
754 		       PTR_ERR(q->umem));
755 		return PTR_ERR(q->umem);
756 	}
757 
758 	fw_pages = ib_umem_page_count(q->umem) <<
759 	    (q->umem->page_shift - FW_PAGE_SHIFT);
760 
761 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
762 	if (rc)
763 		goto err0;
764 
765 	if (alloc_and_init) {
766 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
767 		if (IS_ERR(q->pbl_tbl)) {
768 			rc = PTR_ERR(q->pbl_tbl);
769 			goto err0;
770 		}
771 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
772 				   FW_PAGE_SHIFT);
773 	} else {
774 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
775 		if (!q->pbl_tbl) {
776 			rc = -ENOMEM;
777 			goto err0;
778 		}
779 	}
780 
781 	return 0;
782 
783 err0:
784 	ib_umem_release(q->umem);
785 	q->umem = NULL;
786 
787 	return rc;
788 }
789 
790 static inline void qedr_init_cq_params(struct qedr_cq *cq,
791 				       struct qedr_ucontext *ctx,
792 				       struct qedr_dev *dev, int vector,
793 				       int chain_entries, int page_cnt,
794 				       u64 pbl_ptr,
795 				       struct qed_rdma_create_cq_in_params
796 				       *params)
797 {
798 	memset(params, 0, sizeof(*params));
799 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
800 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
801 	params->cnq_id = vector;
802 	params->cq_size = chain_entries - 1;
803 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
804 	params->pbl_num_pages = page_cnt;
805 	params->pbl_ptr = pbl_ptr;
806 	params->pbl_two_level = 0;
807 }
808 
809 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
810 {
811 	cq->db.data.agg_flags = flags;
812 	cq->db.data.value = cpu_to_le32(cons);
813 	writeq(cq->db.raw, cq->db_addr);
814 
815 	/* Make sure write would stick */
816 	mmiowb();
817 }
818 
819 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
820 {
821 	struct qedr_cq *cq = get_qedr_cq(ibcq);
822 	unsigned long sflags;
823 	struct qedr_dev *dev;
824 
825 	dev = get_qedr_dev(ibcq->device);
826 
827 	if (cq->destroyed) {
828 		DP_ERR(dev,
829 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
830 		       cq, cq->icid);
831 		return -EINVAL;
832 	}
833 
834 
835 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
836 		return 0;
837 
838 	spin_lock_irqsave(&cq->cq_lock, sflags);
839 
840 	cq->arm_flags = 0;
841 
842 	if (flags & IB_CQ_SOLICITED)
843 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
844 
845 	if (flags & IB_CQ_NEXT_COMP)
846 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
847 
848 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
849 
850 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
851 
852 	return 0;
853 }
854 
855 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
856 			     const struct ib_cq_init_attr *attr,
857 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
858 {
859 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
860 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
861 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
862 	struct qedr_dev *dev = get_qedr_dev(ibdev);
863 	struct qed_rdma_create_cq_in_params params;
864 	struct qedr_create_cq_ureq ureq;
865 	int vector = attr->comp_vector;
866 	int entries = attr->cqe;
867 	struct qedr_cq *cq;
868 	int chain_entries;
869 	int page_cnt;
870 	u64 pbl_ptr;
871 	u16 icid;
872 	int rc;
873 
874 	DP_DEBUG(dev, QEDR_MSG_INIT,
875 		 "create_cq: called from %s. entries=%d, vector=%d\n",
876 		 udata ? "User Lib" : "Kernel", entries, vector);
877 
878 	if (entries > QEDR_MAX_CQES) {
879 		DP_ERR(dev,
880 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
881 		       entries, QEDR_MAX_CQES);
882 		return ERR_PTR(-EINVAL);
883 	}
884 
885 	chain_entries = qedr_align_cq_entries(entries);
886 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
887 
888 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
889 	if (!cq)
890 		return ERR_PTR(-ENOMEM);
891 
892 	if (udata) {
893 		memset(&ureq, 0, sizeof(ureq));
894 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
895 			DP_ERR(dev,
896 			       "create cq: problem copying data from user space\n");
897 			goto err0;
898 		}
899 
900 		if (!ureq.len) {
901 			DP_ERR(dev,
902 			       "create cq: cannot create a cq with 0 entries\n");
903 			goto err0;
904 		}
905 
906 		cq->cq_type = QEDR_CQ_TYPE_USER;
907 
908 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
909 					  ureq.len, IB_ACCESS_LOCAL_WRITE,
910 					  1, 1);
911 		if (rc)
912 			goto err0;
913 
914 		pbl_ptr = cq->q.pbl_tbl->pa;
915 		page_cnt = cq->q.pbl_info.num_pbes;
916 
917 		cq->ibcq.cqe = chain_entries;
918 	} else {
919 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
920 
921 		rc = dev->ops->common->chain_alloc(dev->cdev,
922 						   QED_CHAIN_USE_TO_CONSUME,
923 						   QED_CHAIN_MODE_PBL,
924 						   QED_CHAIN_CNT_TYPE_U32,
925 						   chain_entries,
926 						   sizeof(union rdma_cqe),
927 						   &cq->pbl, NULL);
928 		if (rc)
929 			goto err1;
930 
931 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
932 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
933 		cq->ibcq.cqe = cq->pbl.capacity;
934 	}
935 
936 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
937 			    pbl_ptr, &params);
938 
939 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
940 	if (rc)
941 		goto err2;
942 
943 	cq->icid = icid;
944 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
945 	spin_lock_init(&cq->cq_lock);
946 
947 	if (ib_ctx) {
948 		rc = qedr_copy_cq_uresp(dev, cq, udata);
949 		if (rc)
950 			goto err3;
951 	} else {
952 		/* Generate doorbell address. */
953 		cq->db_addr = dev->db_addr +
954 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
955 		cq->db.data.icid = cq->icid;
956 		cq->db.data.params = DB_AGG_CMD_SET <<
957 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
958 
959 		/* point to the very last element, passing it we will toggle */
960 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
961 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
962 		cq->latest_cqe = NULL;
963 		consume_cqe(cq);
964 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
965 	}
966 
967 	DP_DEBUG(dev, QEDR_MSG_CQ,
968 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
969 		 cq->icid, cq, params.cq_size);
970 
971 	return &cq->ibcq;
972 
973 err3:
974 	destroy_iparams.icid = cq->icid;
975 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
976 				  &destroy_oparams);
977 err2:
978 	if (udata)
979 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
980 	else
981 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
982 err1:
983 	if (udata)
984 		ib_umem_release(cq->q.umem);
985 err0:
986 	kfree(cq);
987 	return ERR_PTR(-EINVAL);
988 }
989 
990 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
991 {
992 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
993 	struct qedr_cq *cq = get_qedr_cq(ibcq);
994 
995 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
996 
997 	return 0;
998 }
999 
1000 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1001 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1002 
1003 int qedr_destroy_cq(struct ib_cq *ibcq)
1004 {
1005 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1006 	struct qed_rdma_destroy_cq_out_params oparams;
1007 	struct qed_rdma_destroy_cq_in_params iparams;
1008 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1009 	int iter;
1010 	int rc;
1011 
1012 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1013 
1014 	cq->destroyed = 1;
1015 
1016 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1017 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1018 		goto done;
1019 
1020 	iparams.icid = cq->icid;
1021 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1022 	if (rc)
1023 		return rc;
1024 
1025 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1026 
1027 	if (ibcq->uobject && ibcq->uobject->context) {
1028 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1029 		ib_umem_release(cq->q.umem);
1030 	}
1031 
1032 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1033 	 * wait until all CNQ interrupts, if any, are received. This will always
1034 	 * happen and will always happen very fast. If not, then a serious error
1035 	 * has occured. That is why we can use a long delay.
1036 	 * We spin for a short time so we don’t lose time on context switching
1037 	 * in case all the completions are handled in that span. Otherwise
1038 	 * we sleep for a while and check again. Since the CNQ may be
1039 	 * associated with (only) the current CPU we use msleep to allow the
1040 	 * current CPU to be freed.
1041 	 * The CNQ notification is increased in qedr_irq_handler().
1042 	 */
1043 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1044 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1045 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1046 		iter--;
1047 	}
1048 
1049 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1050 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1051 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1052 		iter--;
1053 	}
1054 
1055 	if (oparams.num_cq_notif != cq->cnq_notif)
1056 		goto err;
1057 
1058 	/* Note that we don't need to have explicit code to wait for the
1059 	 * completion of the event handler because it is invoked from the EQ.
1060 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1061 	 * be certain that there's no event handler in process.
1062 	 */
1063 done:
1064 	cq->sig = ~cq->sig;
1065 
1066 	kfree(cq);
1067 
1068 	return 0;
1069 
1070 err:
1071 	DP_ERR(dev,
1072 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1073 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1074 
1075 	return -EINVAL;
1076 }
1077 
1078 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1079 					  struct ib_qp_attr *attr,
1080 					  int attr_mask,
1081 					  struct qed_rdma_modify_qp_in_params
1082 					  *qp_params)
1083 {
1084 	const struct ib_gid_attr *gid_attr;
1085 	enum rdma_network_type nw_type;
1086 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1087 	u32 ipv4_addr;
1088 	int i;
1089 
1090 	gid_attr = grh->sgid_attr;
1091 	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev);
1092 
1093 	nw_type = rdma_gid_attr_network_type(gid_attr);
1094 	switch (nw_type) {
1095 	case RDMA_NETWORK_IPV6:
1096 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1097 		       sizeof(qp_params->sgid));
1098 		memcpy(&qp_params->dgid.bytes[0],
1099 		       &grh->dgid,
1100 		       sizeof(qp_params->dgid));
1101 		qp_params->roce_mode = ROCE_V2_IPV6;
1102 		SET_FIELD(qp_params->modify_flags,
1103 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1104 		break;
1105 	case RDMA_NETWORK_IB:
1106 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1107 		       sizeof(qp_params->sgid));
1108 		memcpy(&qp_params->dgid.bytes[0],
1109 		       &grh->dgid,
1110 		       sizeof(qp_params->dgid));
1111 		qp_params->roce_mode = ROCE_V1;
1112 		break;
1113 	case RDMA_NETWORK_IPV4:
1114 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1115 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1116 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1117 		qp_params->sgid.ipv4_addr = ipv4_addr;
1118 		ipv4_addr =
1119 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1120 		qp_params->dgid.ipv4_addr = ipv4_addr;
1121 		SET_FIELD(qp_params->modify_flags,
1122 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1123 		qp_params->roce_mode = ROCE_V2_IPV4;
1124 		break;
1125 	}
1126 
1127 	for (i = 0; i < 4; i++) {
1128 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1129 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1130 	}
1131 
1132 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1133 		qp_params->vlan_id = 0;
1134 
1135 	return 0;
1136 }
1137 
1138 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1139 			       struct ib_qp_init_attr *attrs,
1140 			       struct ib_udata *udata)
1141 {
1142 	struct qedr_device_attr *qattr = &dev->attr;
1143 
1144 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1145 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1146 		DP_DEBUG(dev, QEDR_MSG_QP,
1147 			 "create qp: unsupported qp type=0x%x requested\n",
1148 			 attrs->qp_type);
1149 		return -EINVAL;
1150 	}
1151 
1152 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1153 		DP_ERR(dev,
1154 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1155 		       attrs->cap.max_send_wr, qattr->max_sqe);
1156 		return -EINVAL;
1157 	}
1158 
1159 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1160 		DP_ERR(dev,
1161 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1162 		       attrs->cap.max_inline_data, qattr->max_inline);
1163 		return -EINVAL;
1164 	}
1165 
1166 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1167 		DP_ERR(dev,
1168 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1169 		       attrs->cap.max_send_sge, qattr->max_sge);
1170 		return -EINVAL;
1171 	}
1172 
1173 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1174 		DP_ERR(dev,
1175 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1176 		       attrs->cap.max_recv_sge, qattr->max_sge);
1177 		return -EINVAL;
1178 	}
1179 
1180 	/* Unprivileged user space cannot create special QP */
1181 	if (udata && attrs->qp_type == IB_QPT_GSI) {
1182 		DP_ERR(dev,
1183 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1184 		       attrs->qp_type);
1185 		return -EINVAL;
1186 	}
1187 
1188 	return 0;
1189 }
1190 
1191 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1192 			       struct qedr_srq *srq, struct ib_udata *udata)
1193 {
1194 	struct qedr_create_srq_uresp uresp = {};
1195 	int rc;
1196 
1197 	uresp.srq_id = srq->srq_id;
1198 
1199 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1200 	if (rc)
1201 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1202 
1203 	return rc;
1204 }
1205 
1206 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1207 			       struct qedr_create_qp_uresp *uresp,
1208 			       struct qedr_qp *qp)
1209 {
1210 	/* iWARP requires two doorbells per RQ. */
1211 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1212 		uresp->rq_db_offset =
1213 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1214 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1215 	} else {
1216 		uresp->rq_db_offset =
1217 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1218 	}
1219 
1220 	uresp->rq_icid = qp->icid;
1221 }
1222 
1223 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1224 			       struct qedr_create_qp_uresp *uresp,
1225 			       struct qedr_qp *qp)
1226 {
1227 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1228 
1229 	/* iWARP uses the same cid for rq and sq */
1230 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1231 		uresp->sq_icid = qp->icid;
1232 	else
1233 		uresp->sq_icid = qp->icid + 1;
1234 }
1235 
1236 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1237 			      struct qedr_qp *qp, struct ib_udata *udata)
1238 {
1239 	struct qedr_create_qp_uresp uresp;
1240 	int rc;
1241 
1242 	memset(&uresp, 0, sizeof(uresp));
1243 	qedr_copy_sq_uresp(dev, &uresp, qp);
1244 	qedr_copy_rq_uresp(dev, &uresp, qp);
1245 
1246 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1247 	uresp.qp_id = qp->qp_id;
1248 
1249 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1250 	if (rc)
1251 		DP_ERR(dev,
1252 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1253 		       qp->icid);
1254 
1255 	return rc;
1256 }
1257 
1258 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1259 				      struct qedr_qp *qp,
1260 				      struct qedr_pd *pd,
1261 				      struct ib_qp_init_attr *attrs)
1262 {
1263 	spin_lock_init(&qp->q_lock);
1264 	atomic_set(&qp->refcnt, 1);
1265 	qp->pd = pd;
1266 	qp->qp_type = attrs->qp_type;
1267 	qp->max_inline_data = attrs->cap.max_inline_data;
1268 	qp->sq.max_sges = attrs->cap.max_send_sge;
1269 	qp->state = QED_ROCE_QP_STATE_RESET;
1270 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1271 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1272 	qp->dev = dev;
1273 
1274 	if (attrs->srq) {
1275 		qp->srq = get_qedr_srq(attrs->srq);
1276 	} else {
1277 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1278 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1279 		DP_DEBUG(dev, QEDR_MSG_QP,
1280 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1281 			 qp->rq.max_sges, qp->rq_cq->icid);
1282 	}
1283 
1284 	DP_DEBUG(dev, QEDR_MSG_QP,
1285 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1286 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1287 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1288 	DP_DEBUG(dev, QEDR_MSG_QP,
1289 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1290 		 qp->sq.max_sges, qp->sq_cq->icid);
1291 }
1292 
1293 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1294 {
1295 	qp->sq.db = dev->db_addr +
1296 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1297 	qp->sq.db_data.data.icid = qp->icid + 1;
1298 	if (!qp->srq) {
1299 		qp->rq.db = dev->db_addr +
1300 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1301 		qp->rq.db_data.data.icid = qp->icid;
1302 	}
1303 }
1304 
1305 static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev,
1306 				 struct ib_srq_init_attr *attrs,
1307 				 struct ib_udata *udata)
1308 {
1309 	struct qedr_device_attr *qattr = &dev->attr;
1310 
1311 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1312 		DP_ERR(dev,
1313 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1314 		       attrs->attr.max_wr, qattr->max_srq_wr);
1315 		return -EINVAL;
1316 	}
1317 
1318 	if (attrs->attr.max_sge > qattr->max_sge) {
1319 		DP_ERR(dev,
1320 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1321 		       attrs->attr.max_sge, qattr->max_sge);
1322 		return -EINVAL;
1323 	}
1324 
1325 	return 0;
1326 }
1327 
1328 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1329 {
1330 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1331 	ib_umem_release(srq->usrq.umem);
1332 	ib_umem_release(srq->prod_umem);
1333 }
1334 
1335 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1336 {
1337 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1338 	struct qedr_dev *dev = srq->dev;
1339 
1340 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1341 
1342 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1343 			  hw_srq->virt_prod_pair_addr,
1344 			  hw_srq->phy_prod_pair_addr);
1345 }
1346 
1347 static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx,
1348 				     struct qedr_srq *srq,
1349 				     struct qedr_create_srq_ureq *ureq,
1350 				     int access, int dmasync)
1351 {
1352 	struct scatterlist *sg;
1353 	int rc;
1354 
1355 	rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr,
1356 				  ureq->srq_len, access, dmasync, 1);
1357 	if (rc)
1358 		return rc;
1359 
1360 	srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr,
1361 				     sizeof(struct rdma_srq_producers),
1362 				     access, dmasync);
1363 	if (IS_ERR(srq->prod_umem)) {
1364 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1365 		ib_umem_release(srq->usrq.umem);
1366 		DP_ERR(srq->dev,
1367 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1368 		       PTR_ERR(srq->prod_umem));
1369 		return PTR_ERR(srq->prod_umem);
1370 	}
1371 
1372 	sg = srq->prod_umem->sg_head.sgl;
1373 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1374 
1375 	return 0;
1376 }
1377 
1378 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1379 					struct qedr_dev *dev,
1380 					struct ib_srq_init_attr *init_attr)
1381 {
1382 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1383 	dma_addr_t phy_prod_pair_addr;
1384 	u32 num_elems;
1385 	void *va;
1386 	int rc;
1387 
1388 	va = dma_alloc_coherent(&dev->pdev->dev,
1389 				sizeof(struct rdma_srq_producers),
1390 				&phy_prod_pair_addr, GFP_KERNEL);
1391 	if (!va) {
1392 		DP_ERR(dev,
1393 		       "create srq: failed to allocate dma memory for producer\n");
1394 		return -ENOMEM;
1395 	}
1396 
1397 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1398 	hw_srq->virt_prod_pair_addr = va;
1399 
1400 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1401 	rc = dev->ops->common->chain_alloc(dev->cdev,
1402 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1403 					   QED_CHAIN_MODE_PBL,
1404 					   QED_CHAIN_CNT_TYPE_U32,
1405 					   num_elems,
1406 					   QEDR_SRQ_WQE_ELEM_SIZE,
1407 					   &hw_srq->pbl, NULL);
1408 	if (rc)
1409 		goto err0;
1410 
1411 	hw_srq->num_elems = num_elems;
1412 
1413 	return 0;
1414 
1415 err0:
1416 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1417 			  va, phy_prod_pair_addr);
1418 	return rc;
1419 }
1420 
1421 static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
1422 			void *ptr, u32 id);
1423 static void qedr_idr_remove(struct qedr_dev *dev,
1424 			    struct qedr_idr *qidr, u32 id);
1425 
1426 struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
1427 			       struct ib_srq_init_attr *init_attr,
1428 			       struct ib_udata *udata)
1429 {
1430 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1431 	struct qed_rdma_create_srq_in_params in_params = {};
1432 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1433 	struct qed_rdma_create_srq_out_params out_params;
1434 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1435 	struct qedr_create_srq_ureq ureq = {};
1436 	u64 pbl_base_addr, phy_prod_pair_addr;
1437 	struct ib_ucontext *ib_ctx = NULL;
1438 	struct qedr_srq_hwq_info *hw_srq;
1439 	u32 page_cnt, page_size;
1440 	struct qedr_srq *srq;
1441 	int rc = 0;
1442 
1443 	DP_DEBUG(dev, QEDR_MSG_QP,
1444 		 "create SRQ called from %s (pd %p)\n",
1445 		 (udata) ? "User lib" : "kernel", pd);
1446 
1447 	rc = qedr_check_srq_params(ibpd, dev, init_attr, udata);
1448 	if (rc)
1449 		return ERR_PTR(-EINVAL);
1450 
1451 	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1452 	if (!srq)
1453 		return ERR_PTR(-ENOMEM);
1454 
1455 	srq->dev = dev;
1456 	hw_srq = &srq->hw_srq;
1457 	spin_lock_init(&srq->lock);
1458 
1459 	hw_srq->max_wr = init_attr->attr.max_wr;
1460 	hw_srq->max_sges = init_attr->attr.max_sge;
1461 
1462 	if (udata && ibpd->uobject && ibpd->uobject->context) {
1463 		ib_ctx = ibpd->uobject->context;
1464 
1465 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
1466 			DP_ERR(dev,
1467 			       "create srq: problem copying data from user space\n");
1468 			goto err0;
1469 		}
1470 
1471 		rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0);
1472 		if (rc)
1473 			goto err0;
1474 
1475 		page_cnt = srq->usrq.pbl_info.num_pbes;
1476 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1477 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1478 		page_size = BIT(srq->usrq.umem->page_shift);
1479 	} else {
1480 		struct qed_chain *pbl;
1481 
1482 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1483 		if (rc)
1484 			goto err0;
1485 
1486 		pbl = &hw_srq->pbl;
1487 		page_cnt = qed_chain_get_page_cnt(pbl);
1488 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1489 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1490 		page_size = QED_CHAIN_PAGE_SIZE;
1491 	}
1492 
1493 	in_params.pd_id = pd->pd_id;
1494 	in_params.pbl_base_addr = pbl_base_addr;
1495 	in_params.prod_pair_addr = phy_prod_pair_addr;
1496 	in_params.num_pages = page_cnt;
1497 	in_params.page_size = page_size;
1498 
1499 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1500 	if (rc)
1501 		goto err1;
1502 
1503 	srq->srq_id = out_params.srq_id;
1504 
1505 	if (udata) {
1506 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1507 		if (rc)
1508 			goto err2;
1509 	}
1510 
1511 	rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id);
1512 	if (rc)
1513 		goto err2;
1514 
1515 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1516 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1517 	return &srq->ibsrq;
1518 
1519 err2:
1520 	destroy_in_params.srq_id = srq->srq_id;
1521 
1522 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1523 err1:
1524 	if (udata)
1525 		qedr_free_srq_user_params(srq);
1526 	else
1527 		qedr_free_srq_kernel_params(srq);
1528 err0:
1529 	kfree(srq);
1530 
1531 	return ERR_PTR(-EFAULT);
1532 }
1533 
1534 int qedr_destroy_srq(struct ib_srq *ibsrq)
1535 {
1536 	struct qed_rdma_destroy_srq_in_params in_params = {};
1537 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1538 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1539 
1540 	qedr_idr_remove(dev, &dev->srqidr, srq->srq_id);
1541 	in_params.srq_id = srq->srq_id;
1542 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1543 
1544 	if (ibsrq->uobject)
1545 		qedr_free_srq_user_params(srq);
1546 	else
1547 		qedr_free_srq_kernel_params(srq);
1548 
1549 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1550 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1551 		 srq->srq_id);
1552 	kfree(srq);
1553 
1554 	return 0;
1555 }
1556 
1557 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1558 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1559 {
1560 	struct qed_rdma_modify_srq_in_params in_params = {};
1561 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1562 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1563 	int rc;
1564 
1565 	if (attr_mask & IB_SRQ_MAX_WR) {
1566 		DP_ERR(dev,
1567 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1568 		       attr_mask, srq);
1569 		return -EINVAL;
1570 	}
1571 
1572 	if (attr_mask & IB_SRQ_LIMIT) {
1573 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1574 			DP_ERR(dev,
1575 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1576 			       attr->srq_limit, srq->hw_srq.max_wr);
1577 			return -EINVAL;
1578 		}
1579 
1580 		in_params.srq_id = srq->srq_id;
1581 		in_params.wqe_limit = attr->srq_limit;
1582 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1583 		if (rc)
1584 			return rc;
1585 	}
1586 
1587 	srq->srq_limit = attr->srq_limit;
1588 
1589 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1590 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1591 
1592 	return 0;
1593 }
1594 
1595 static inline void
1596 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1597 			      struct qedr_pd *pd,
1598 			      struct qedr_qp *qp,
1599 			      struct ib_qp_init_attr *attrs,
1600 			      bool fmr_and_reserved_lkey,
1601 			      struct qed_rdma_create_qp_in_params *params)
1602 {
1603 	/* QP handle to be written in an async event */
1604 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1605 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1606 
1607 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1608 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1609 	params->pd = pd->pd_id;
1610 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1611 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1612 	params->stats_queue = 0;
1613 	params->srq_id = 0;
1614 	params->use_srq = false;
1615 
1616 	if (!qp->srq) {
1617 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1618 
1619 	} else {
1620 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1621 		params->srq_id = qp->srq->srq_id;
1622 		params->use_srq = true;
1623 	}
1624 }
1625 
1626 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1627 {
1628 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1629 		 "qp=%p. "
1630 		 "sq_addr=0x%llx, "
1631 		 "sq_len=%zd, "
1632 		 "rq_addr=0x%llx, "
1633 		 "rq_len=%zd"
1634 		 "\n",
1635 		 qp,
1636 		 qp->usq.buf_addr,
1637 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1638 }
1639 
1640 static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
1641 			void *ptr, u32 id)
1642 {
1643 	int rc;
1644 
1645 	idr_preload(GFP_KERNEL);
1646 	spin_lock_irq(&qidr->idr_lock);
1647 
1648 	rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC);
1649 
1650 	spin_unlock_irq(&qidr->idr_lock);
1651 	idr_preload_end();
1652 
1653 	return rc < 0 ? rc : 0;
1654 }
1655 
1656 static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id)
1657 {
1658 	spin_lock_irq(&qidr->idr_lock);
1659 	idr_remove(&qidr->idr, id);
1660 	spin_unlock_irq(&qidr->idr_lock);
1661 }
1662 
1663 static inline void
1664 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1665 			    struct qedr_qp *qp,
1666 			    struct qed_rdma_create_qp_out_params *out_params)
1667 {
1668 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1669 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1670 
1671 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1672 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1673 	if (!qp->srq) {
1674 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1675 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1676 	}
1677 
1678 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1679 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1680 }
1681 
1682 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1683 {
1684 	if (qp->usq.umem)
1685 		ib_umem_release(qp->usq.umem);
1686 	qp->usq.umem = NULL;
1687 
1688 	if (qp->urq.umem)
1689 		ib_umem_release(qp->urq.umem);
1690 	qp->urq.umem = NULL;
1691 }
1692 
1693 static int qedr_create_user_qp(struct qedr_dev *dev,
1694 			       struct qedr_qp *qp,
1695 			       struct ib_pd *ibpd,
1696 			       struct ib_udata *udata,
1697 			       struct ib_qp_init_attr *attrs)
1698 {
1699 	struct qed_rdma_create_qp_in_params in_params;
1700 	struct qed_rdma_create_qp_out_params out_params;
1701 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1702 	struct ib_ucontext *ib_ctx = NULL;
1703 	struct qedr_create_qp_ureq ureq;
1704 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1705 	int rc = -EINVAL;
1706 
1707 	ib_ctx = ibpd->uobject->context;
1708 
1709 	memset(&ureq, 0, sizeof(ureq));
1710 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1711 	if (rc) {
1712 		DP_ERR(dev, "Problem copying data from user space\n");
1713 		return rc;
1714 	}
1715 
1716 	/* SQ - read access only (0), dma sync not required (0) */
1717 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1718 				  ureq.sq_len, 0, 0, alloc_and_init);
1719 	if (rc)
1720 		return rc;
1721 
1722 	if (!qp->srq) {
1723 		/* RQ - read access only (0), dma sync not required (0) */
1724 		rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1725 					  ureq.rq_len, 0, 0, alloc_and_init);
1726 		if (rc)
1727 			return rc;
1728 	}
1729 
1730 	memset(&in_params, 0, sizeof(in_params));
1731 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1732 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1733 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1734 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1735 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1736 	if (!qp->srq) {
1737 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1738 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1739 	}
1740 
1741 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1742 					      &in_params, &out_params);
1743 
1744 	if (!qp->qed_qp) {
1745 		rc = -ENOMEM;
1746 		goto err1;
1747 	}
1748 
1749 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1750 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1751 
1752 	qp->qp_id = out_params.qp_id;
1753 	qp->icid = out_params.icid;
1754 
1755 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1756 	if (rc)
1757 		goto err;
1758 
1759 	qedr_qp_user_print(dev, qp);
1760 
1761 	return 0;
1762 err:
1763 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1764 	if (rc)
1765 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1766 
1767 err1:
1768 	qedr_cleanup_user(dev, qp);
1769 	return rc;
1770 }
1771 
1772 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1773 {
1774 	qp->sq.db = dev->db_addr +
1775 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1776 	qp->sq.db_data.data.icid = qp->icid;
1777 
1778 	qp->rq.db = dev->db_addr +
1779 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1780 	qp->rq.db_data.data.icid = qp->icid;
1781 	qp->rq.iwarp_db2 = dev->db_addr +
1782 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1783 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1784 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1785 }
1786 
1787 static int
1788 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1789 			   struct qedr_qp *qp,
1790 			   struct qed_rdma_create_qp_in_params *in_params,
1791 			   u32 n_sq_elems, u32 n_rq_elems)
1792 {
1793 	struct qed_rdma_create_qp_out_params out_params;
1794 	int rc;
1795 
1796 	rc = dev->ops->common->chain_alloc(dev->cdev,
1797 					   QED_CHAIN_USE_TO_PRODUCE,
1798 					   QED_CHAIN_MODE_PBL,
1799 					   QED_CHAIN_CNT_TYPE_U32,
1800 					   n_sq_elems,
1801 					   QEDR_SQE_ELEMENT_SIZE,
1802 					   &qp->sq.pbl, NULL);
1803 
1804 	if (rc)
1805 		return rc;
1806 
1807 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1808 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1809 
1810 	rc = dev->ops->common->chain_alloc(dev->cdev,
1811 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1812 					   QED_CHAIN_MODE_PBL,
1813 					   QED_CHAIN_CNT_TYPE_U32,
1814 					   n_rq_elems,
1815 					   QEDR_RQE_ELEMENT_SIZE,
1816 					   &qp->rq.pbl, NULL);
1817 	if (rc)
1818 		return rc;
1819 
1820 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1821 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1822 
1823 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1824 					      in_params, &out_params);
1825 
1826 	if (!qp->qed_qp)
1827 		return -EINVAL;
1828 
1829 	qp->qp_id = out_params.qp_id;
1830 	qp->icid = out_params.icid;
1831 
1832 	qedr_set_roce_db_info(dev, qp);
1833 	return rc;
1834 }
1835 
1836 static int
1837 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1838 			    struct qedr_qp *qp,
1839 			    struct qed_rdma_create_qp_in_params *in_params,
1840 			    u32 n_sq_elems, u32 n_rq_elems)
1841 {
1842 	struct qed_rdma_create_qp_out_params out_params;
1843 	struct qed_chain_ext_pbl ext_pbl;
1844 	int rc;
1845 
1846 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1847 						     QEDR_SQE_ELEMENT_SIZE,
1848 						     QED_CHAIN_MODE_PBL);
1849 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1850 						     QEDR_RQE_ELEMENT_SIZE,
1851 						     QED_CHAIN_MODE_PBL);
1852 
1853 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1854 					      in_params, &out_params);
1855 
1856 	if (!qp->qed_qp)
1857 		return -EINVAL;
1858 
1859 	/* Now we allocate the chain */
1860 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1861 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1862 
1863 	rc = dev->ops->common->chain_alloc(dev->cdev,
1864 					   QED_CHAIN_USE_TO_PRODUCE,
1865 					   QED_CHAIN_MODE_PBL,
1866 					   QED_CHAIN_CNT_TYPE_U32,
1867 					   n_sq_elems,
1868 					   QEDR_SQE_ELEMENT_SIZE,
1869 					   &qp->sq.pbl, &ext_pbl);
1870 
1871 	if (rc)
1872 		goto err;
1873 
1874 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1875 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1876 
1877 	rc = dev->ops->common->chain_alloc(dev->cdev,
1878 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1879 					   QED_CHAIN_MODE_PBL,
1880 					   QED_CHAIN_CNT_TYPE_U32,
1881 					   n_rq_elems,
1882 					   QEDR_RQE_ELEMENT_SIZE,
1883 					   &qp->rq.pbl, &ext_pbl);
1884 
1885 	if (rc)
1886 		goto err;
1887 
1888 	qp->qp_id = out_params.qp_id;
1889 	qp->icid = out_params.icid;
1890 
1891 	qedr_set_iwarp_db_info(dev, qp);
1892 	return rc;
1893 
1894 err:
1895 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1896 
1897 	return rc;
1898 }
1899 
1900 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1901 {
1902 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1903 	kfree(qp->wqe_wr_id);
1904 
1905 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1906 	kfree(qp->rqe_wr_id);
1907 }
1908 
1909 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1910 				 struct qedr_qp *qp,
1911 				 struct ib_pd *ibpd,
1912 				 struct ib_qp_init_attr *attrs)
1913 {
1914 	struct qed_rdma_create_qp_in_params in_params;
1915 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1916 	int rc = -EINVAL;
1917 	u32 n_rq_elems;
1918 	u32 n_sq_elems;
1919 	u32 n_sq_entries;
1920 
1921 	memset(&in_params, 0, sizeof(in_params));
1922 
1923 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1924 	 * the ring. The ring should allow at least a single WR, even if the
1925 	 * user requested none, due to allocation issues.
1926 	 * We should add an extra WR since the prod and cons indices of
1927 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1928 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1929 	 * double the number of entries due an iSER issue that pushes far more
1930 	 * WRs than indicated. If we decline its ib_post_send() then we get
1931 	 * error prints in the dmesg we'd like to avoid.
1932 	 */
1933 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1934 			      dev->attr.max_sqe);
1935 
1936 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
1937 				GFP_KERNEL);
1938 	if (!qp->wqe_wr_id) {
1939 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1940 		return -ENOMEM;
1941 	}
1942 
1943 	/* QP handle to be written in CQE */
1944 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1945 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1946 
1947 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1948 	 * the ring. There ring should allow at least a single WR, even if the
1949 	 * user requested none, due to allocation issues.
1950 	 */
1951 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1952 
1953 	/* Allocate driver internal RQ array */
1954 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
1955 				GFP_KERNEL);
1956 	if (!qp->rqe_wr_id) {
1957 		DP_ERR(dev,
1958 		       "create qp: failed RQ shadow memory allocation\n");
1959 		kfree(qp->wqe_wr_id);
1960 		return -ENOMEM;
1961 	}
1962 
1963 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1964 
1965 	n_sq_entries = attrs->cap.max_send_wr;
1966 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1967 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1968 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1969 
1970 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1971 
1972 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1973 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1974 						 n_sq_elems, n_rq_elems);
1975 	else
1976 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1977 						n_sq_elems, n_rq_elems);
1978 	if (rc)
1979 		qedr_cleanup_kernel(dev, qp);
1980 
1981 	return rc;
1982 }
1983 
1984 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1985 			     struct ib_qp_init_attr *attrs,
1986 			     struct ib_udata *udata)
1987 {
1988 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1989 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1990 	struct qedr_qp *qp;
1991 	struct ib_qp *ibqp;
1992 	int rc = 0;
1993 
1994 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1995 		 udata ? "user library" : "kernel", pd);
1996 
1997 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
1998 	if (rc)
1999 		return ERR_PTR(rc);
2000 
2001 	DP_DEBUG(dev, QEDR_MSG_QP,
2002 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2003 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2004 		 get_qedr_cq(attrs->send_cq),
2005 		 get_qedr_cq(attrs->send_cq)->icid,
2006 		 get_qedr_cq(attrs->recv_cq),
2007 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2008 
2009 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2010 	if (!qp) {
2011 		DP_ERR(dev, "create qp: failed allocating memory\n");
2012 		return ERR_PTR(-ENOMEM);
2013 	}
2014 
2015 	qedr_set_common_qp_params(dev, qp, pd, attrs);
2016 
2017 	if (attrs->qp_type == IB_QPT_GSI) {
2018 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
2019 		if (IS_ERR(ibqp))
2020 			kfree(qp);
2021 		return ibqp;
2022 	}
2023 
2024 	if (udata)
2025 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2026 	else
2027 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2028 
2029 	if (rc)
2030 		goto err;
2031 
2032 	qp->ibqp.qp_num = qp->qp_id;
2033 
2034 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2035 		rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id);
2036 		if (rc)
2037 			goto err;
2038 	}
2039 
2040 	return &qp->ibqp;
2041 
2042 err:
2043 	kfree(qp);
2044 
2045 	return ERR_PTR(-EFAULT);
2046 }
2047 
2048 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2049 {
2050 	switch (qp_state) {
2051 	case QED_ROCE_QP_STATE_RESET:
2052 		return IB_QPS_RESET;
2053 	case QED_ROCE_QP_STATE_INIT:
2054 		return IB_QPS_INIT;
2055 	case QED_ROCE_QP_STATE_RTR:
2056 		return IB_QPS_RTR;
2057 	case QED_ROCE_QP_STATE_RTS:
2058 		return IB_QPS_RTS;
2059 	case QED_ROCE_QP_STATE_SQD:
2060 		return IB_QPS_SQD;
2061 	case QED_ROCE_QP_STATE_ERR:
2062 		return IB_QPS_ERR;
2063 	case QED_ROCE_QP_STATE_SQE:
2064 		return IB_QPS_SQE;
2065 	}
2066 	return IB_QPS_ERR;
2067 }
2068 
2069 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2070 					enum ib_qp_state qp_state)
2071 {
2072 	switch (qp_state) {
2073 	case IB_QPS_RESET:
2074 		return QED_ROCE_QP_STATE_RESET;
2075 	case IB_QPS_INIT:
2076 		return QED_ROCE_QP_STATE_INIT;
2077 	case IB_QPS_RTR:
2078 		return QED_ROCE_QP_STATE_RTR;
2079 	case IB_QPS_RTS:
2080 		return QED_ROCE_QP_STATE_RTS;
2081 	case IB_QPS_SQD:
2082 		return QED_ROCE_QP_STATE_SQD;
2083 	case IB_QPS_ERR:
2084 		return QED_ROCE_QP_STATE_ERR;
2085 	default:
2086 		return QED_ROCE_QP_STATE_ERR;
2087 	}
2088 }
2089 
2090 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2091 {
2092 	qed_chain_reset(&qph->pbl);
2093 	qph->prod = 0;
2094 	qph->cons = 0;
2095 	qph->wqe_cons = 0;
2096 	qph->db_data.data.value = cpu_to_le16(0);
2097 }
2098 
2099 static int qedr_update_qp_state(struct qedr_dev *dev,
2100 				struct qedr_qp *qp,
2101 				enum qed_roce_qp_state cur_state,
2102 				enum qed_roce_qp_state new_state)
2103 {
2104 	int status = 0;
2105 
2106 	if (new_state == cur_state)
2107 		return 0;
2108 
2109 	switch (cur_state) {
2110 	case QED_ROCE_QP_STATE_RESET:
2111 		switch (new_state) {
2112 		case QED_ROCE_QP_STATE_INIT:
2113 			qp->prev_wqe_size = 0;
2114 			qedr_reset_qp_hwq_info(&qp->sq);
2115 			qedr_reset_qp_hwq_info(&qp->rq);
2116 			break;
2117 		default:
2118 			status = -EINVAL;
2119 			break;
2120 		};
2121 		break;
2122 	case QED_ROCE_QP_STATE_INIT:
2123 		switch (new_state) {
2124 		case QED_ROCE_QP_STATE_RTR:
2125 			/* Update doorbell (in case post_recv was
2126 			 * done before move to RTR)
2127 			 */
2128 
2129 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2130 				writel(qp->rq.db_data.raw, qp->rq.db);
2131 				/* Make sure write takes effect */
2132 				mmiowb();
2133 			}
2134 			break;
2135 		case QED_ROCE_QP_STATE_ERR:
2136 			break;
2137 		default:
2138 			/* Invalid state change. */
2139 			status = -EINVAL;
2140 			break;
2141 		};
2142 		break;
2143 	case QED_ROCE_QP_STATE_RTR:
2144 		/* RTR->XXX */
2145 		switch (new_state) {
2146 		case QED_ROCE_QP_STATE_RTS:
2147 			break;
2148 		case QED_ROCE_QP_STATE_ERR:
2149 			break;
2150 		default:
2151 			/* Invalid state change. */
2152 			status = -EINVAL;
2153 			break;
2154 		};
2155 		break;
2156 	case QED_ROCE_QP_STATE_RTS:
2157 		/* RTS->XXX */
2158 		switch (new_state) {
2159 		case QED_ROCE_QP_STATE_SQD:
2160 			break;
2161 		case QED_ROCE_QP_STATE_ERR:
2162 			break;
2163 		default:
2164 			/* Invalid state change. */
2165 			status = -EINVAL;
2166 			break;
2167 		};
2168 		break;
2169 	case QED_ROCE_QP_STATE_SQD:
2170 		/* SQD->XXX */
2171 		switch (new_state) {
2172 		case QED_ROCE_QP_STATE_RTS:
2173 		case QED_ROCE_QP_STATE_ERR:
2174 			break;
2175 		default:
2176 			/* Invalid state change. */
2177 			status = -EINVAL;
2178 			break;
2179 		};
2180 		break;
2181 	case QED_ROCE_QP_STATE_ERR:
2182 		/* ERR->XXX */
2183 		switch (new_state) {
2184 		case QED_ROCE_QP_STATE_RESET:
2185 			if ((qp->rq.prod != qp->rq.cons) ||
2186 			    (qp->sq.prod != qp->sq.cons)) {
2187 				DP_NOTICE(dev,
2188 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2189 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2190 					  qp->sq.cons);
2191 				status = -EINVAL;
2192 			}
2193 			break;
2194 		default:
2195 			status = -EINVAL;
2196 			break;
2197 		};
2198 		break;
2199 	default:
2200 		status = -EINVAL;
2201 		break;
2202 	};
2203 
2204 	return status;
2205 }
2206 
2207 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2208 		   int attr_mask, struct ib_udata *udata)
2209 {
2210 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2211 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2212 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2213 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2214 	enum ib_qp_state old_qp_state, new_qp_state;
2215 	enum qed_roce_qp_state cur_state;
2216 	int rc = 0;
2217 
2218 	DP_DEBUG(dev, QEDR_MSG_QP,
2219 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2220 		 attr->qp_state);
2221 
2222 	old_qp_state = qedr_get_ibqp_state(qp->state);
2223 	if (attr_mask & IB_QP_STATE)
2224 		new_qp_state = attr->qp_state;
2225 	else
2226 		new_qp_state = old_qp_state;
2227 
2228 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2229 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2230 					ibqp->qp_type, attr_mask)) {
2231 			DP_ERR(dev,
2232 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2233 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2234 			       attr_mask, qp->qp_id, ibqp->qp_type,
2235 			       old_qp_state, new_qp_state);
2236 			rc = -EINVAL;
2237 			goto err;
2238 		}
2239 	}
2240 
2241 	/* Translate the masks... */
2242 	if (attr_mask & IB_QP_STATE) {
2243 		SET_FIELD(qp_params.modify_flags,
2244 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2245 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2246 	}
2247 
2248 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2249 		qp_params.sqd_async = true;
2250 
2251 	if (attr_mask & IB_QP_PKEY_INDEX) {
2252 		SET_FIELD(qp_params.modify_flags,
2253 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2254 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2255 			rc = -EINVAL;
2256 			goto err;
2257 		}
2258 
2259 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2260 	}
2261 
2262 	if (attr_mask & IB_QP_QKEY)
2263 		qp->qkey = attr->qkey;
2264 
2265 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2266 		SET_FIELD(qp_params.modify_flags,
2267 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2268 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2269 						  IB_ACCESS_REMOTE_READ;
2270 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2271 						   IB_ACCESS_REMOTE_WRITE;
2272 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2273 					       IB_ACCESS_REMOTE_ATOMIC;
2274 	}
2275 
2276 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2277 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2278 			return -EINVAL;
2279 
2280 		if (attr_mask & IB_QP_PATH_MTU) {
2281 			if (attr->path_mtu < IB_MTU_256 ||
2282 			    attr->path_mtu > IB_MTU_4096) {
2283 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2284 				rc = -EINVAL;
2285 				goto err;
2286 			}
2287 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2288 				      ib_mtu_enum_to_int(iboe_get_mtu
2289 							 (dev->ndev->mtu)));
2290 		}
2291 
2292 		if (!qp->mtu) {
2293 			qp->mtu =
2294 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2295 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2296 		}
2297 
2298 		SET_FIELD(qp_params.modify_flags,
2299 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2300 
2301 		qp_params.traffic_class_tos = grh->traffic_class;
2302 		qp_params.flow_label = grh->flow_label;
2303 		qp_params.hop_limit_ttl = grh->hop_limit;
2304 
2305 		qp->sgid_idx = grh->sgid_index;
2306 
2307 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2308 		if (rc) {
2309 			DP_ERR(dev,
2310 			       "modify qp: problems with GID index %d (rc=%d)\n",
2311 			       grh->sgid_index, rc);
2312 			return rc;
2313 		}
2314 
2315 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2316 				   qp_params.remote_mac_addr);
2317 		if (rc)
2318 			return rc;
2319 
2320 		qp_params.use_local_mac = true;
2321 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2322 
2323 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2324 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2325 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2326 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2327 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2328 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2329 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2330 			 qp_params.remote_mac_addr);
2331 
2332 		qp_params.mtu = qp->mtu;
2333 		qp_params.lb_indication = false;
2334 	}
2335 
2336 	if (!qp_params.mtu) {
2337 		/* Stay with current MTU */
2338 		if (qp->mtu)
2339 			qp_params.mtu = qp->mtu;
2340 		else
2341 			qp_params.mtu =
2342 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2343 	}
2344 
2345 	if (attr_mask & IB_QP_TIMEOUT) {
2346 		SET_FIELD(qp_params.modify_flags,
2347 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2348 
2349 		/* The received timeout value is an exponent used like this:
2350 		 *    "12.7.34 LOCAL ACK TIMEOUT
2351 		 *    Value representing the transport (ACK) timeout for use by
2352 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2353 		 * The FW expects timeout in msec so we need to divide the usec
2354 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2355 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2356 		 * The value of zero means infinite so we use a 'max_t' to make
2357 		 * sure that sub 1 msec values will be configured as 1 msec.
2358 		 */
2359 		if (attr->timeout)
2360 			qp_params.ack_timeout =
2361 					1 << max_t(int, attr->timeout - 8, 0);
2362 		else
2363 			qp_params.ack_timeout = 0;
2364 	}
2365 
2366 	if (attr_mask & IB_QP_RETRY_CNT) {
2367 		SET_FIELD(qp_params.modify_flags,
2368 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2369 		qp_params.retry_cnt = attr->retry_cnt;
2370 	}
2371 
2372 	if (attr_mask & IB_QP_RNR_RETRY) {
2373 		SET_FIELD(qp_params.modify_flags,
2374 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2375 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2376 	}
2377 
2378 	if (attr_mask & IB_QP_RQ_PSN) {
2379 		SET_FIELD(qp_params.modify_flags,
2380 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2381 		qp_params.rq_psn = attr->rq_psn;
2382 		qp->rq_psn = attr->rq_psn;
2383 	}
2384 
2385 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2386 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2387 			rc = -EINVAL;
2388 			DP_ERR(dev,
2389 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2390 			       attr->max_rd_atomic,
2391 			       dev->attr.max_qp_req_rd_atomic_resc);
2392 			goto err;
2393 		}
2394 
2395 		SET_FIELD(qp_params.modify_flags,
2396 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2397 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2398 	}
2399 
2400 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2401 		SET_FIELD(qp_params.modify_flags,
2402 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2403 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2404 	}
2405 
2406 	if (attr_mask & IB_QP_SQ_PSN) {
2407 		SET_FIELD(qp_params.modify_flags,
2408 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2409 		qp_params.sq_psn = attr->sq_psn;
2410 		qp->sq_psn = attr->sq_psn;
2411 	}
2412 
2413 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2414 		if (attr->max_dest_rd_atomic >
2415 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2416 			DP_ERR(dev,
2417 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2418 			       attr->max_dest_rd_atomic,
2419 			       dev->attr.max_qp_resp_rd_atomic_resc);
2420 
2421 			rc = -EINVAL;
2422 			goto err;
2423 		}
2424 
2425 		SET_FIELD(qp_params.modify_flags,
2426 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2427 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2428 	}
2429 
2430 	if (attr_mask & IB_QP_DEST_QPN) {
2431 		SET_FIELD(qp_params.modify_flags,
2432 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2433 
2434 		qp_params.dest_qp = attr->dest_qp_num;
2435 		qp->dest_qp_num = attr->dest_qp_num;
2436 	}
2437 
2438 	cur_state = qp->state;
2439 
2440 	/* Update the QP state before the actual ramrod to prevent a race with
2441 	 * fast path. Modifying the QP state to error will cause the device to
2442 	 * flush the CQEs and while polling the flushed CQEs will considered as
2443 	 * a potential issue if the QP isn't in error state.
2444 	 */
2445 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2446 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2447 		qp->state = QED_ROCE_QP_STATE_ERR;
2448 
2449 	if (qp->qp_type != IB_QPT_GSI)
2450 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2451 					      qp->qed_qp, &qp_params);
2452 
2453 	if (attr_mask & IB_QP_STATE) {
2454 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2455 			rc = qedr_update_qp_state(dev, qp, cur_state,
2456 						  qp_params.new_state);
2457 		qp->state = qp_params.new_state;
2458 	}
2459 
2460 err:
2461 	return rc;
2462 }
2463 
2464 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2465 {
2466 	int ib_qp_acc_flags = 0;
2467 
2468 	if (params->incoming_rdma_write_en)
2469 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2470 	if (params->incoming_rdma_read_en)
2471 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2472 	if (params->incoming_atomic_en)
2473 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2474 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2475 	return ib_qp_acc_flags;
2476 }
2477 
2478 int qedr_query_qp(struct ib_qp *ibqp,
2479 		  struct ib_qp_attr *qp_attr,
2480 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2481 {
2482 	struct qed_rdma_query_qp_out_params params;
2483 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2484 	struct qedr_dev *dev = qp->dev;
2485 	int rc = 0;
2486 
2487 	memset(&params, 0, sizeof(params));
2488 
2489 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2490 	if (rc)
2491 		goto err;
2492 
2493 	memset(qp_attr, 0, sizeof(*qp_attr));
2494 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2495 
2496 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2497 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2498 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2499 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2500 	qp_attr->rq_psn = params.rq_psn;
2501 	qp_attr->sq_psn = params.sq_psn;
2502 	qp_attr->dest_qp_num = params.dest_qp;
2503 
2504 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2505 
2506 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2507 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2508 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2509 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2510 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2511 	qp_init_attr->cap = qp_attr->cap;
2512 
2513 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2514 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2515 			params.flow_label, qp->sgid_idx,
2516 			params.hop_limit_ttl, params.traffic_class_tos);
2517 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2518 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2519 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2520 	qp_attr->timeout = params.timeout;
2521 	qp_attr->rnr_retry = params.rnr_retry;
2522 	qp_attr->retry_cnt = params.retry_cnt;
2523 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2524 	qp_attr->pkey_index = params.pkey_index;
2525 	qp_attr->port_num = 1;
2526 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2527 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2528 	qp_attr->alt_pkey_index = 0;
2529 	qp_attr->alt_port_num = 0;
2530 	qp_attr->alt_timeout = 0;
2531 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2532 
2533 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2534 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2535 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2536 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2537 
2538 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2539 		 qp_attr->cap.max_inline_data);
2540 
2541 err:
2542 	return rc;
2543 }
2544 
2545 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2546 {
2547 	int rc = 0;
2548 
2549 	if (qp->qp_type != IB_QPT_GSI) {
2550 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2551 		if (rc)
2552 			return rc;
2553 	}
2554 
2555 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2556 		qedr_cleanup_user(dev, qp);
2557 	else
2558 		qedr_cleanup_kernel(dev, qp);
2559 
2560 	return 0;
2561 }
2562 
2563 int qedr_destroy_qp(struct ib_qp *ibqp)
2564 {
2565 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2566 	struct qedr_dev *dev = qp->dev;
2567 	struct ib_qp_attr attr;
2568 	int attr_mask = 0;
2569 	int rc = 0;
2570 
2571 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2572 		 qp, qp->qp_type);
2573 
2574 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2575 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2576 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2577 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2578 
2579 			attr.qp_state = IB_QPS_ERR;
2580 			attr_mask |= IB_QP_STATE;
2581 
2582 			/* Change the QP state to ERROR */
2583 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2584 		}
2585 	} else {
2586 		/* Wait for the connect/accept to complete */
2587 		if (qp->ep) {
2588 			int wait_count = 1;
2589 
2590 			while (qp->ep->during_connect) {
2591 				DP_DEBUG(dev, QEDR_MSG_QP,
2592 					 "Still in during connect/accept\n");
2593 
2594 				msleep(100);
2595 				if (wait_count++ > 200) {
2596 					DP_NOTICE(dev,
2597 						  "during connect timeout\n");
2598 					break;
2599 				}
2600 			}
2601 		}
2602 	}
2603 
2604 	if (qp->qp_type == IB_QPT_GSI)
2605 		qedr_destroy_gsi_qp(dev);
2606 
2607 	qedr_free_qp_resources(dev, qp);
2608 
2609 	if (atomic_dec_and_test(&qp->refcnt) &&
2610 	    rdma_protocol_iwarp(&dev->ibdev, 1)) {
2611 		qedr_idr_remove(dev, &dev->qpidr, qp->qp_id);
2612 		kfree(qp);
2613 	}
2614 	return rc;
2615 }
2616 
2617 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2618 			     u32 flags, struct ib_udata *udata)
2619 {
2620 	struct qedr_ah *ah;
2621 
2622 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2623 	if (!ah)
2624 		return ERR_PTR(-ENOMEM);
2625 
2626 	rdma_copy_ah_attr(&ah->attr, attr);
2627 
2628 	return &ah->ibah;
2629 }
2630 
2631 int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2632 {
2633 	struct qedr_ah *ah = get_qedr_ah(ibah);
2634 
2635 	rdma_destroy_ah_attr(&ah->attr);
2636 	kfree(ah);
2637 	return 0;
2638 }
2639 
2640 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2641 {
2642 	struct qedr_pbl *pbl, *tmp;
2643 
2644 	if (info->pbl_table)
2645 		list_add_tail(&info->pbl_table->list_entry,
2646 			      &info->free_pbl_list);
2647 
2648 	if (!list_empty(&info->inuse_pbl_list))
2649 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2650 
2651 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2652 		list_del(&pbl->list_entry);
2653 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2654 	}
2655 }
2656 
2657 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2658 			size_t page_list_len, bool two_layered)
2659 {
2660 	struct qedr_pbl *tmp;
2661 	int rc;
2662 
2663 	INIT_LIST_HEAD(&info->free_pbl_list);
2664 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2665 
2666 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2667 				  page_list_len, two_layered);
2668 	if (rc)
2669 		goto done;
2670 
2671 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2672 	if (IS_ERR(info->pbl_table)) {
2673 		rc = PTR_ERR(info->pbl_table);
2674 		goto done;
2675 	}
2676 
2677 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2678 		 &info->pbl_table->pa);
2679 
2680 	/* in usual case we use 2 PBLs, so we add one to free
2681 	 * list and allocating another one
2682 	 */
2683 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2684 	if (IS_ERR(tmp)) {
2685 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2686 		goto done;
2687 	}
2688 
2689 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2690 
2691 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2692 
2693 done:
2694 	if (rc)
2695 		free_mr_info(dev, info);
2696 
2697 	return rc;
2698 }
2699 
2700 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2701 			       u64 usr_addr, int acc, struct ib_udata *udata)
2702 {
2703 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2704 	struct qedr_mr *mr;
2705 	struct qedr_pd *pd;
2706 	int rc = -ENOMEM;
2707 
2708 	pd = get_qedr_pd(ibpd);
2709 	DP_DEBUG(dev, QEDR_MSG_MR,
2710 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2711 		 pd->pd_id, start, len, usr_addr, acc);
2712 
2713 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2714 		return ERR_PTR(-EINVAL);
2715 
2716 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2717 	if (!mr)
2718 		return ERR_PTR(rc);
2719 
2720 	mr->type = QEDR_MR_USER;
2721 
2722 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2723 	if (IS_ERR(mr->umem)) {
2724 		rc = -EFAULT;
2725 		goto err0;
2726 	}
2727 
2728 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2729 	if (rc)
2730 		goto err1;
2731 
2732 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2733 			   &mr->info.pbl_info, mr->umem->page_shift);
2734 
2735 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2736 	if (rc) {
2737 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2738 		goto err1;
2739 	}
2740 
2741 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2742 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2743 	mr->hw_mr.key = 0;
2744 	mr->hw_mr.pd = pd->pd_id;
2745 	mr->hw_mr.local_read = 1;
2746 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2747 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2748 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2749 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2750 	mr->hw_mr.mw_bind = false;
2751 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2752 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2753 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2754 	mr->hw_mr.page_size_log = mr->umem->page_shift;
2755 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2756 	mr->hw_mr.length = len;
2757 	mr->hw_mr.vaddr = usr_addr;
2758 	mr->hw_mr.zbva = false;
2759 	mr->hw_mr.phy_mr = false;
2760 	mr->hw_mr.dma_mr = false;
2761 
2762 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2763 	if (rc) {
2764 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2765 		goto err2;
2766 	}
2767 
2768 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2769 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2770 	    mr->hw_mr.remote_atomic)
2771 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2772 
2773 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2774 		 mr->ibmr.lkey);
2775 	return &mr->ibmr;
2776 
2777 err2:
2778 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2779 err1:
2780 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2781 err0:
2782 	kfree(mr);
2783 	return ERR_PTR(rc);
2784 }
2785 
2786 int qedr_dereg_mr(struct ib_mr *ib_mr)
2787 {
2788 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2789 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2790 	int rc = 0;
2791 
2792 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2793 	if (rc)
2794 		return rc;
2795 
2796 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2797 
2798 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2799 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2800 
2801 	/* it could be user registered memory. */
2802 	if (mr->umem)
2803 		ib_umem_release(mr->umem);
2804 
2805 	kfree(mr);
2806 
2807 	return rc;
2808 }
2809 
2810 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2811 				       int max_page_list_len)
2812 {
2813 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2814 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2815 	struct qedr_mr *mr;
2816 	int rc = -ENOMEM;
2817 
2818 	DP_DEBUG(dev, QEDR_MSG_MR,
2819 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2820 		 max_page_list_len);
2821 
2822 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2823 	if (!mr)
2824 		return ERR_PTR(rc);
2825 
2826 	mr->dev = dev;
2827 	mr->type = QEDR_MR_FRMR;
2828 
2829 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2830 	if (rc)
2831 		goto err0;
2832 
2833 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2834 	if (rc) {
2835 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2836 		goto err0;
2837 	}
2838 
2839 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2840 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2841 	mr->hw_mr.key = 0;
2842 	mr->hw_mr.pd = pd->pd_id;
2843 	mr->hw_mr.local_read = 1;
2844 	mr->hw_mr.local_write = 0;
2845 	mr->hw_mr.remote_read = 0;
2846 	mr->hw_mr.remote_write = 0;
2847 	mr->hw_mr.remote_atomic = 0;
2848 	mr->hw_mr.mw_bind = false;
2849 	mr->hw_mr.pbl_ptr = 0;
2850 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2851 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2852 	mr->hw_mr.fbo = 0;
2853 	mr->hw_mr.length = 0;
2854 	mr->hw_mr.vaddr = 0;
2855 	mr->hw_mr.zbva = false;
2856 	mr->hw_mr.phy_mr = true;
2857 	mr->hw_mr.dma_mr = false;
2858 
2859 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2860 	if (rc) {
2861 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2862 		goto err1;
2863 	}
2864 
2865 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2866 	mr->ibmr.rkey = mr->ibmr.lkey;
2867 
2868 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2869 	return mr;
2870 
2871 err1:
2872 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2873 err0:
2874 	kfree(mr);
2875 	return ERR_PTR(rc);
2876 }
2877 
2878 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2879 			    enum ib_mr_type mr_type, u32 max_num_sg)
2880 {
2881 	struct qedr_mr *mr;
2882 
2883 	if (mr_type != IB_MR_TYPE_MEM_REG)
2884 		return ERR_PTR(-EINVAL);
2885 
2886 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2887 
2888 	if (IS_ERR(mr))
2889 		return ERR_PTR(-EINVAL);
2890 
2891 	return &mr->ibmr;
2892 }
2893 
2894 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2895 {
2896 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2897 	struct qedr_pbl *pbl_table;
2898 	struct regpair *pbe;
2899 	u32 pbes_in_page;
2900 
2901 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2902 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2903 		return -ENOMEM;
2904 	}
2905 
2906 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2907 		 mr->npages, addr);
2908 
2909 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2910 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2911 	pbe = (struct regpair *)pbl_table->va;
2912 	pbe +=  mr->npages % pbes_in_page;
2913 	pbe->lo = cpu_to_le32((u32)addr);
2914 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2915 
2916 	mr->npages++;
2917 
2918 	return 0;
2919 }
2920 
2921 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2922 {
2923 	int work = info->completed - info->completed_handled - 1;
2924 
2925 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2926 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2927 		struct qedr_pbl *pbl;
2928 
2929 		/* Free all the page list that are possible to be freed
2930 		 * (all the ones that were invalidated), under the assumption
2931 		 * that if an FMR was completed successfully that means that
2932 		 * if there was an invalidate operation before it also ended
2933 		 */
2934 		pbl = list_first_entry(&info->inuse_pbl_list,
2935 				       struct qedr_pbl, list_entry);
2936 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2937 		info->completed_handled++;
2938 	}
2939 }
2940 
2941 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2942 		   int sg_nents, unsigned int *sg_offset)
2943 {
2944 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2945 
2946 	mr->npages = 0;
2947 
2948 	handle_completed_mrs(mr->dev, &mr->info);
2949 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2950 }
2951 
2952 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2953 {
2954 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2955 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2956 	struct qedr_mr *mr;
2957 	int rc;
2958 
2959 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2960 	if (!mr)
2961 		return ERR_PTR(-ENOMEM);
2962 
2963 	mr->type = QEDR_MR_DMA;
2964 
2965 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2966 	if (rc) {
2967 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2968 		goto err1;
2969 	}
2970 
2971 	/* index only, 18 bit long, lkey = itid << 8 | key */
2972 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2973 	mr->hw_mr.pd = pd->pd_id;
2974 	mr->hw_mr.local_read = 1;
2975 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2976 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2977 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2978 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2979 	mr->hw_mr.dma_mr = true;
2980 
2981 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2982 	if (rc) {
2983 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2984 		goto err2;
2985 	}
2986 
2987 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2988 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2989 	    mr->hw_mr.remote_atomic)
2990 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2991 
2992 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2993 	return &mr->ibmr;
2994 
2995 err2:
2996 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2997 err1:
2998 	kfree(mr);
2999 	return ERR_PTR(rc);
3000 }
3001 
3002 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3003 {
3004 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3005 }
3006 
3007 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3008 {
3009 	int i, len = 0;
3010 
3011 	for (i = 0; i < num_sge; i++)
3012 		len += sg_list[i].length;
3013 
3014 	return len;
3015 }
3016 
3017 static void swap_wqe_data64(u64 *p)
3018 {
3019 	int i;
3020 
3021 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3022 		*p = cpu_to_be64(cpu_to_le64(*p));
3023 }
3024 
3025 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3026 				       struct qedr_qp *qp, u8 *wqe_size,
3027 				       const struct ib_send_wr *wr,
3028 				       const struct ib_send_wr **bad_wr,
3029 				       u8 *bits, u8 bit)
3030 {
3031 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3032 	char *seg_prt, *wqe;
3033 	int i, seg_siz;
3034 
3035 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3036 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3037 		*bad_wr = wr;
3038 		return 0;
3039 	}
3040 
3041 	if (!data_size)
3042 		return data_size;
3043 
3044 	*bits |= bit;
3045 
3046 	seg_prt = NULL;
3047 	wqe = NULL;
3048 	seg_siz = 0;
3049 
3050 	/* Copy data inline */
3051 	for (i = 0; i < wr->num_sge; i++) {
3052 		u32 len = wr->sg_list[i].length;
3053 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3054 
3055 		while (len > 0) {
3056 			u32 cur;
3057 
3058 			/* New segment required */
3059 			if (!seg_siz) {
3060 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3061 				seg_prt = wqe;
3062 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3063 				(*wqe_size)++;
3064 			}
3065 
3066 			/* Calculate currently allowed length */
3067 			cur = min_t(u32, len, seg_siz);
3068 			memcpy(seg_prt, src, cur);
3069 
3070 			/* Update segment variables */
3071 			seg_prt += cur;
3072 			seg_siz -= cur;
3073 
3074 			/* Update sge variables */
3075 			src += cur;
3076 			len -= cur;
3077 
3078 			/* Swap fully-completed segments */
3079 			if (!seg_siz)
3080 				swap_wqe_data64((u64 *)wqe);
3081 		}
3082 	}
3083 
3084 	/* swap last not completed segment */
3085 	if (seg_siz)
3086 		swap_wqe_data64((u64 *)wqe);
3087 
3088 	return data_size;
3089 }
3090 
3091 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3092 	do {							\
3093 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3094 		(sge)->length = cpu_to_le32(vlength);		\
3095 		(sge)->flags = cpu_to_le32(vflags);		\
3096 	} while (0)
3097 
3098 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3099 	do {							\
3100 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3101 		(hdr)->num_sges = num_sge;			\
3102 	} while (0)
3103 
3104 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3105 	do {							\
3106 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3107 		(sge)->length = cpu_to_le32(vlength);		\
3108 		(sge)->l_key = cpu_to_le32(vlkey);		\
3109 	} while (0)
3110 
3111 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3112 				const struct ib_send_wr *wr)
3113 {
3114 	u32 data_size = 0;
3115 	int i;
3116 
3117 	for (i = 0; i < wr->num_sge; i++) {
3118 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3119 
3120 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3121 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3122 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3123 		data_size += wr->sg_list[i].length;
3124 	}
3125 
3126 	if (wqe_size)
3127 		*wqe_size += wr->num_sge;
3128 
3129 	return data_size;
3130 }
3131 
3132 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3133 				     struct qedr_qp *qp,
3134 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3135 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3136 				     const struct ib_send_wr *wr,
3137 				     const struct ib_send_wr **bad_wr)
3138 {
3139 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3140 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3141 
3142 	if (wr->send_flags & IB_SEND_INLINE &&
3143 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3144 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3145 		u8 flags = 0;
3146 
3147 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3148 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3149 						   bad_wr, &rwqe->flags, flags);
3150 	}
3151 
3152 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3153 }
3154 
3155 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3156 				     struct qedr_qp *qp,
3157 				     struct rdma_sq_send_wqe_1st *swqe,
3158 				     struct rdma_sq_send_wqe_2st *swqe2,
3159 				     const struct ib_send_wr *wr,
3160 				     const struct ib_send_wr **bad_wr)
3161 {
3162 	memset(swqe2, 0, sizeof(*swqe2));
3163 	if (wr->send_flags & IB_SEND_INLINE) {
3164 		u8 flags = 0;
3165 
3166 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3167 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3168 						   bad_wr, &swqe->flags, flags);
3169 	}
3170 
3171 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3172 }
3173 
3174 static int qedr_prepare_reg(struct qedr_qp *qp,
3175 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3176 			    const struct ib_reg_wr *wr)
3177 {
3178 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3179 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3180 
3181 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3182 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3183 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3184 	fwqe1->l_key = wr->key;
3185 
3186 	fwqe2->access_ctrl = 0;
3187 
3188 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3189 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3190 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3191 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3192 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3193 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3194 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3195 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3196 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3197 	fwqe2->fmr_ctrl = 0;
3198 
3199 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3200 		   ilog2(mr->ibmr.page_size) - 12);
3201 
3202 	fwqe2->length_hi = 0;
3203 	fwqe2->length_lo = mr->ibmr.length;
3204 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3205 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3206 
3207 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3208 
3209 	return 0;
3210 }
3211 
3212 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3213 {
3214 	switch (opcode) {
3215 	case IB_WR_RDMA_WRITE:
3216 	case IB_WR_RDMA_WRITE_WITH_IMM:
3217 		return IB_WC_RDMA_WRITE;
3218 	case IB_WR_SEND_WITH_IMM:
3219 	case IB_WR_SEND:
3220 	case IB_WR_SEND_WITH_INV:
3221 		return IB_WC_SEND;
3222 	case IB_WR_RDMA_READ:
3223 	case IB_WR_RDMA_READ_WITH_INV:
3224 		return IB_WC_RDMA_READ;
3225 	case IB_WR_ATOMIC_CMP_AND_SWP:
3226 		return IB_WC_COMP_SWAP;
3227 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3228 		return IB_WC_FETCH_ADD;
3229 	case IB_WR_REG_MR:
3230 		return IB_WC_REG_MR;
3231 	case IB_WR_LOCAL_INV:
3232 		return IB_WC_LOCAL_INV;
3233 	default:
3234 		return IB_WC_SEND;
3235 	}
3236 }
3237 
3238 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3239 				      const struct ib_send_wr *wr)
3240 {
3241 	int wq_is_full, err_wr, pbl_is_full;
3242 	struct qedr_dev *dev = qp->dev;
3243 
3244 	/* prevent SQ overflow and/or processing of a bad WR */
3245 	err_wr = wr->num_sge > qp->sq.max_sges;
3246 	wq_is_full = qedr_wq_is_full(&qp->sq);
3247 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3248 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3249 	if (wq_is_full || err_wr || pbl_is_full) {
3250 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3251 			DP_ERR(dev,
3252 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3253 			       qp);
3254 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3255 		}
3256 
3257 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3258 			DP_ERR(dev,
3259 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3260 			       qp);
3261 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3262 		}
3263 
3264 		if (pbl_is_full &&
3265 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3266 			DP_ERR(dev,
3267 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3268 			       qp);
3269 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3270 		}
3271 		return false;
3272 	}
3273 	return true;
3274 }
3275 
3276 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3277 			    const struct ib_send_wr **bad_wr)
3278 {
3279 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3280 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3281 	struct rdma_sq_atomic_wqe_1st *awqe1;
3282 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3283 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3284 	struct rdma_sq_send_wqe_2st *swqe2;
3285 	struct rdma_sq_local_inv_wqe *iwqe;
3286 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3287 	struct rdma_sq_send_wqe_1st *swqe;
3288 	struct rdma_sq_rdma_wqe_1st *rwqe;
3289 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3290 	struct rdma_sq_common_wqe *wqe;
3291 	u32 length;
3292 	int rc = 0;
3293 	bool comp;
3294 
3295 	if (!qedr_can_post_send(qp, wr)) {
3296 		*bad_wr = wr;
3297 		return -ENOMEM;
3298 	}
3299 
3300 	wqe = qed_chain_produce(&qp->sq.pbl);
3301 	qp->wqe_wr_id[qp->sq.prod].signaled =
3302 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3303 
3304 	wqe->flags = 0;
3305 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3306 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3307 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3308 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3309 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3310 		   !!(wr->send_flags & IB_SEND_FENCE));
3311 	wqe->prev_wqe_size = qp->prev_wqe_size;
3312 
3313 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3314 
3315 	switch (wr->opcode) {
3316 	case IB_WR_SEND_WITH_IMM:
3317 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3318 			rc = -EINVAL;
3319 			*bad_wr = wr;
3320 			break;
3321 		}
3322 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3323 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3324 		swqe->wqe_size = 2;
3325 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3326 
3327 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3328 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3329 						   wr, bad_wr);
3330 		swqe->length = cpu_to_le32(length);
3331 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3332 		qp->prev_wqe_size = swqe->wqe_size;
3333 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3334 		break;
3335 	case IB_WR_SEND:
3336 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3337 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3338 
3339 		swqe->wqe_size = 2;
3340 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3341 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3342 						   wr, bad_wr);
3343 		swqe->length = cpu_to_le32(length);
3344 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3345 		qp->prev_wqe_size = swqe->wqe_size;
3346 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3347 		break;
3348 	case IB_WR_SEND_WITH_INV:
3349 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3350 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3351 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3352 		swqe->wqe_size = 2;
3353 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3354 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3355 						   wr, bad_wr);
3356 		swqe->length = cpu_to_le32(length);
3357 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3358 		qp->prev_wqe_size = swqe->wqe_size;
3359 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3360 		break;
3361 
3362 	case IB_WR_RDMA_WRITE_WITH_IMM:
3363 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3364 			rc = -EINVAL;
3365 			*bad_wr = wr;
3366 			break;
3367 		}
3368 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3369 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3370 
3371 		rwqe->wqe_size = 2;
3372 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3373 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3374 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3375 						   wr, bad_wr);
3376 		rwqe->length = cpu_to_le32(length);
3377 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3378 		qp->prev_wqe_size = rwqe->wqe_size;
3379 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3380 		break;
3381 	case IB_WR_RDMA_WRITE:
3382 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3383 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3384 
3385 		rwqe->wqe_size = 2;
3386 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3387 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3388 						   wr, bad_wr);
3389 		rwqe->length = cpu_to_le32(length);
3390 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3391 		qp->prev_wqe_size = rwqe->wqe_size;
3392 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3393 		break;
3394 	case IB_WR_RDMA_READ_WITH_INV:
3395 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3396 		/* fallthrough -- same is identical to RDMA READ */
3397 
3398 	case IB_WR_RDMA_READ:
3399 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3400 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3401 
3402 		rwqe->wqe_size = 2;
3403 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3404 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3405 						   wr, bad_wr);
3406 		rwqe->length = cpu_to_le32(length);
3407 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3408 		qp->prev_wqe_size = rwqe->wqe_size;
3409 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3410 		break;
3411 
3412 	case IB_WR_ATOMIC_CMP_AND_SWP:
3413 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3414 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3415 		awqe1->wqe_size = 4;
3416 
3417 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3418 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3419 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3420 
3421 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3422 
3423 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3424 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3425 			DMA_REGPAIR_LE(awqe3->swap_data,
3426 				       atomic_wr(wr)->compare_add);
3427 		} else {
3428 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3429 			DMA_REGPAIR_LE(awqe3->swap_data,
3430 				       atomic_wr(wr)->swap);
3431 			DMA_REGPAIR_LE(awqe3->cmp_data,
3432 				       atomic_wr(wr)->compare_add);
3433 		}
3434 
3435 		qedr_prepare_sq_sges(qp, NULL, wr);
3436 
3437 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3438 		qp->prev_wqe_size = awqe1->wqe_size;
3439 		break;
3440 
3441 	case IB_WR_LOCAL_INV:
3442 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3443 		iwqe->wqe_size = 1;
3444 
3445 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3446 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3447 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3448 		qp->prev_wqe_size = iwqe->wqe_size;
3449 		break;
3450 	case IB_WR_REG_MR:
3451 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3452 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3453 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3454 		fwqe1->wqe_size = 2;
3455 
3456 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3457 		if (rc) {
3458 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3459 			*bad_wr = wr;
3460 			break;
3461 		}
3462 
3463 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3464 		qp->prev_wqe_size = fwqe1->wqe_size;
3465 		break;
3466 	default:
3467 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3468 		rc = -EINVAL;
3469 		*bad_wr = wr;
3470 		break;
3471 	}
3472 
3473 	if (*bad_wr) {
3474 		u16 value;
3475 
3476 		/* Restore prod to its position before
3477 		 * this WR was processed
3478 		 */
3479 		value = le16_to_cpu(qp->sq.db_data.data.value);
3480 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3481 
3482 		/* Restore prev_wqe_size */
3483 		qp->prev_wqe_size = wqe->prev_wqe_size;
3484 		rc = -EINVAL;
3485 		DP_ERR(dev, "POST SEND FAILED\n");
3486 	}
3487 
3488 	return rc;
3489 }
3490 
3491 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3492 		   const struct ib_send_wr **bad_wr)
3493 {
3494 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3495 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3496 	unsigned long flags;
3497 	int rc = 0;
3498 
3499 	*bad_wr = NULL;
3500 
3501 	if (qp->qp_type == IB_QPT_GSI)
3502 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3503 
3504 	spin_lock_irqsave(&qp->q_lock, flags);
3505 
3506 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3507 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3508 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3509 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3510 			spin_unlock_irqrestore(&qp->q_lock, flags);
3511 			*bad_wr = wr;
3512 			DP_DEBUG(dev, QEDR_MSG_CQ,
3513 				 "QP in wrong state! QP icid=0x%x state %d\n",
3514 				 qp->icid, qp->state);
3515 			return -EINVAL;
3516 		}
3517 	}
3518 
3519 	while (wr) {
3520 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3521 		if (rc)
3522 			break;
3523 
3524 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3525 
3526 		qedr_inc_sw_prod(&qp->sq);
3527 
3528 		qp->sq.db_data.data.value++;
3529 
3530 		wr = wr->next;
3531 	}
3532 
3533 	/* Trigger doorbell
3534 	 * If there was a failure in the first WR then it will be triggered in
3535 	 * vane. However this is not harmful (as long as the producer value is
3536 	 * unchanged). For performance reasons we avoid checking for this
3537 	 * redundant doorbell.
3538 	 *
3539 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3540 	 * soon as we give the doorbell, we could get a completion
3541 	 * for this wr, therefore we need to make sure that the
3542 	 * memory is updated before giving the doorbell.
3543 	 * During qedr_poll_cq, rmb is called before accessing the
3544 	 * cqe. This covers for the smp_rmb as well.
3545 	 */
3546 	smp_wmb();
3547 	writel(qp->sq.db_data.raw, qp->sq.db);
3548 
3549 	/* Make sure write sticks */
3550 	mmiowb();
3551 
3552 	spin_unlock_irqrestore(&qp->q_lock, flags);
3553 
3554 	return rc;
3555 }
3556 
3557 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3558 {
3559 	u32 used;
3560 
3561 	/* Calculate number of elements used based on producer
3562 	 * count and consumer count and subtract it from max
3563 	 * work request supported so that we get elements left.
3564 	 */
3565 	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3566 
3567 	return hw_srq->max_wr - used;
3568 }
3569 
3570 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3571 		       const struct ib_recv_wr **bad_wr)
3572 {
3573 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3574 	struct qedr_srq_hwq_info *hw_srq;
3575 	struct qedr_dev *dev = srq->dev;
3576 	struct qed_chain *pbl;
3577 	unsigned long flags;
3578 	int status = 0;
3579 	u32 num_sge;
3580 	u32 offset;
3581 
3582 	spin_lock_irqsave(&srq->lock, flags);
3583 
3584 	hw_srq = &srq->hw_srq;
3585 	pbl = &srq->hw_srq.pbl;
3586 	while (wr) {
3587 		struct rdma_srq_wqe_header *hdr;
3588 		int i;
3589 
3590 		if (!qedr_srq_elem_left(hw_srq) ||
3591 		    wr->num_sge > srq->hw_srq.max_sges) {
3592 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3593 			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3594 			       wr->num_sge, srq->hw_srq.max_sges);
3595 			status = -ENOMEM;
3596 			*bad_wr = wr;
3597 			break;
3598 		}
3599 
3600 		hdr = qed_chain_produce(pbl);
3601 		num_sge = wr->num_sge;
3602 		/* Set number of sge and work request id in header */
3603 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3604 
3605 		srq->hw_srq.wr_prod_cnt++;
3606 		hw_srq->wqe_prod++;
3607 		hw_srq->sge_prod++;
3608 
3609 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3610 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3611 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3612 
3613 		for (i = 0; i < wr->num_sge; i++) {
3614 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3615 
3616 			/* Set SGE length, lkey and address */
3617 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3618 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3619 
3620 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3621 				 "[%d]: len %d key %x addr %x:%x\n",
3622 				 i, srq_sge->length, srq_sge->l_key,
3623 				 srq_sge->addr.hi, srq_sge->addr.lo);
3624 			hw_srq->sge_prod++;
3625 		}
3626 
3627 		/* Flush WQE and SGE information before
3628 		 * updating producer.
3629 		 */
3630 		wmb();
3631 
3632 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3633 		 * in first 4 bytes and need to update WQE producer in
3634 		 * next 4 bytes.
3635 		 */
3636 		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3637 		offset = offsetof(struct rdma_srq_producers, wqe_prod);
3638 		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3639 			hw_srq->wqe_prod;
3640 
3641 		/* Flush producer after updating it. */
3642 		wmb();
3643 		wr = wr->next;
3644 	}
3645 
3646 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3647 		 qed_chain_get_elem_left(pbl));
3648 	spin_unlock_irqrestore(&srq->lock, flags);
3649 
3650 	return status;
3651 }
3652 
3653 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3654 		   const struct ib_recv_wr **bad_wr)
3655 {
3656 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3657 	struct qedr_dev *dev = qp->dev;
3658 	unsigned long flags;
3659 	int status = 0;
3660 
3661 	if (qp->qp_type == IB_QPT_GSI)
3662 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3663 
3664 	spin_lock_irqsave(&qp->q_lock, flags);
3665 
3666 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3667 		spin_unlock_irqrestore(&qp->q_lock, flags);
3668 		*bad_wr = wr;
3669 		return -EINVAL;
3670 	}
3671 
3672 	while (wr) {
3673 		int i;
3674 
3675 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3676 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3677 		    wr->num_sge > qp->rq.max_sges) {
3678 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3679 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3680 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3681 			       qp->rq.max_sges);
3682 			status = -ENOMEM;
3683 			*bad_wr = wr;
3684 			break;
3685 		}
3686 		for (i = 0; i < wr->num_sge; i++) {
3687 			u32 flags = 0;
3688 			struct rdma_rq_sge *rqe =
3689 			    qed_chain_produce(&qp->rq.pbl);
3690 
3691 			/* First one must include the number
3692 			 * of SGE in the list
3693 			 */
3694 			if (!i)
3695 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3696 					  wr->num_sge);
3697 
3698 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3699 				  wr->sg_list[i].lkey);
3700 
3701 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3702 				   wr->sg_list[i].length, flags);
3703 		}
3704 
3705 		/* Special case of no sges. FW requires between 1-4 sges...
3706 		 * in this case we need to post 1 sge with length zero. this is
3707 		 * because rdma write with immediate consumes an RQ.
3708 		 */
3709 		if (!wr->num_sge) {
3710 			u32 flags = 0;
3711 			struct rdma_rq_sge *rqe =
3712 			    qed_chain_produce(&qp->rq.pbl);
3713 
3714 			/* First one must include the number
3715 			 * of SGE in the list
3716 			 */
3717 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3718 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3719 
3720 			RQ_SGE_SET(rqe, 0, 0, flags);
3721 			i = 1;
3722 		}
3723 
3724 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3725 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3726 
3727 		qedr_inc_sw_prod(&qp->rq);
3728 
3729 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3730 		 * soon as we give the doorbell, we could get a completion
3731 		 * for this wr, therefore we need to make sure that the
3732 		 * memory is update before giving the doorbell.
3733 		 * During qedr_poll_cq, rmb is called before accessing the
3734 		 * cqe. This covers for the smp_rmb as well.
3735 		 */
3736 		smp_wmb();
3737 
3738 		qp->rq.db_data.data.value++;
3739 
3740 		writel(qp->rq.db_data.raw, qp->rq.db);
3741 
3742 		/* Make sure write sticks */
3743 		mmiowb();
3744 
3745 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3746 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3747 			mmiowb();	/* for second doorbell */
3748 		}
3749 
3750 		wr = wr->next;
3751 	}
3752 
3753 	spin_unlock_irqrestore(&qp->q_lock, flags);
3754 
3755 	return status;
3756 }
3757 
3758 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3759 {
3760 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3761 
3762 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3763 		cq->pbl_toggle;
3764 }
3765 
3766 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3767 {
3768 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3769 	struct qedr_qp *qp;
3770 
3771 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3772 						   resp_cqe->qp_handle.lo,
3773 						   u64);
3774 	return qp;
3775 }
3776 
3777 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3778 {
3779 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3780 
3781 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3782 }
3783 
3784 /* Return latest CQE (needs processing) */
3785 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3786 {
3787 	return cq->latest_cqe;
3788 }
3789 
3790 /* In fmr we need to increase the number of fmr completed counter for the fmr
3791  * algorithm determining whether we can free a pbl or not.
3792  * we need to perform this whether the work request was signaled or not. for
3793  * this purpose we call this function from the condition that checks if a wr
3794  * should be skipped, to make sure we don't miss it ( possibly this fmr
3795  * operation was not signalted)
3796  */
3797 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3798 {
3799 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3800 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3801 }
3802 
3803 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3804 		       struct qedr_cq *cq, int num_entries,
3805 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3806 		       int force)
3807 {
3808 	u16 cnt = 0;
3809 
3810 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3811 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3812 			qedr_chk_if_fmr(qp);
3813 			/* skip WC */
3814 			goto next_cqe;
3815 		}
3816 
3817 		/* fill WC */
3818 		wc->status = status;
3819 		wc->vendor_err = 0;
3820 		wc->wc_flags = 0;
3821 		wc->src_qp = qp->id;
3822 		wc->qp = &qp->ibqp;
3823 
3824 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3825 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3826 
3827 		switch (wc->opcode) {
3828 		case IB_WC_RDMA_WRITE:
3829 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3830 			break;
3831 		case IB_WC_COMP_SWAP:
3832 		case IB_WC_FETCH_ADD:
3833 			wc->byte_len = 8;
3834 			break;
3835 		case IB_WC_REG_MR:
3836 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3837 			break;
3838 		case IB_WC_RDMA_READ:
3839 		case IB_WC_SEND:
3840 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3841 			break;
3842 		default:
3843 			break;
3844 		}
3845 
3846 		num_entries--;
3847 		wc++;
3848 		cnt++;
3849 next_cqe:
3850 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3851 			qed_chain_consume(&qp->sq.pbl);
3852 		qedr_inc_sw_cons(&qp->sq);
3853 	}
3854 
3855 	return cnt;
3856 }
3857 
3858 static int qedr_poll_cq_req(struct qedr_dev *dev,
3859 			    struct qedr_qp *qp, struct qedr_cq *cq,
3860 			    int num_entries, struct ib_wc *wc,
3861 			    struct rdma_cqe_requester *req)
3862 {
3863 	int cnt = 0;
3864 
3865 	switch (req->status) {
3866 	case RDMA_CQE_REQ_STS_OK:
3867 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3868 				  IB_WC_SUCCESS, 0);
3869 		break;
3870 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3871 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3872 			DP_DEBUG(dev, QEDR_MSG_CQ,
3873 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3874 				 cq->icid, qp->icid);
3875 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3876 				  IB_WC_WR_FLUSH_ERR, 1);
3877 		break;
3878 	default:
3879 		/* process all WQE before the cosumer */
3880 		qp->state = QED_ROCE_QP_STATE_ERR;
3881 		cnt = process_req(dev, qp, cq, num_entries, wc,
3882 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3883 		wc += cnt;
3884 		/* if we have extra WC fill it with actual error info */
3885 		if (cnt < num_entries) {
3886 			enum ib_wc_status wc_status;
3887 
3888 			switch (req->status) {
3889 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3890 				DP_ERR(dev,
3891 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3892 				       cq->icid, qp->icid);
3893 				wc_status = IB_WC_BAD_RESP_ERR;
3894 				break;
3895 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3896 				DP_ERR(dev,
3897 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3898 				       cq->icid, qp->icid);
3899 				wc_status = IB_WC_LOC_LEN_ERR;
3900 				break;
3901 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3902 				DP_ERR(dev,
3903 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3904 				       cq->icid, qp->icid);
3905 				wc_status = IB_WC_LOC_QP_OP_ERR;
3906 				break;
3907 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3908 				DP_ERR(dev,
3909 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3910 				       cq->icid, qp->icid);
3911 				wc_status = IB_WC_LOC_PROT_ERR;
3912 				break;
3913 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3914 				DP_ERR(dev,
3915 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3916 				       cq->icid, qp->icid);
3917 				wc_status = IB_WC_MW_BIND_ERR;
3918 				break;
3919 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3920 				DP_ERR(dev,
3921 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3922 				       cq->icid, qp->icid);
3923 				wc_status = IB_WC_REM_INV_REQ_ERR;
3924 				break;
3925 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3926 				DP_ERR(dev,
3927 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3928 				       cq->icid, qp->icid);
3929 				wc_status = IB_WC_REM_ACCESS_ERR;
3930 				break;
3931 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3932 				DP_ERR(dev,
3933 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3934 				       cq->icid, qp->icid);
3935 				wc_status = IB_WC_REM_OP_ERR;
3936 				break;
3937 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3938 				DP_ERR(dev,
3939 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3940 				       cq->icid, qp->icid);
3941 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3942 				break;
3943 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3944 				DP_ERR(dev,
3945 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3946 				       cq->icid, qp->icid);
3947 				wc_status = IB_WC_RETRY_EXC_ERR;
3948 				break;
3949 			default:
3950 				DP_ERR(dev,
3951 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3952 				       cq->icid, qp->icid);
3953 				wc_status = IB_WC_GENERAL_ERR;
3954 			}
3955 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3956 					   wc_status, 1);
3957 		}
3958 	}
3959 
3960 	return cnt;
3961 }
3962 
3963 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3964 {
3965 	switch (status) {
3966 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3967 		return IB_WC_LOC_ACCESS_ERR;
3968 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3969 		return IB_WC_LOC_LEN_ERR;
3970 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3971 		return IB_WC_LOC_QP_OP_ERR;
3972 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3973 		return IB_WC_LOC_PROT_ERR;
3974 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3975 		return IB_WC_MW_BIND_ERR;
3976 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3977 		return IB_WC_REM_INV_RD_REQ_ERR;
3978 	case RDMA_CQE_RESP_STS_OK:
3979 		return IB_WC_SUCCESS;
3980 	default:
3981 		return IB_WC_GENERAL_ERR;
3982 	}
3983 }
3984 
3985 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3986 					  struct ib_wc *wc)
3987 {
3988 	wc->status = IB_WC_SUCCESS;
3989 	wc->byte_len = le32_to_cpu(resp->length);
3990 
3991 	if (resp->flags & QEDR_RESP_IMM) {
3992 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3993 		wc->wc_flags |= IB_WC_WITH_IMM;
3994 
3995 		if (resp->flags & QEDR_RESP_RDMA)
3996 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3997 
3998 		if (resp->flags & QEDR_RESP_INV)
3999 			return -EINVAL;
4000 
4001 	} else if (resp->flags & QEDR_RESP_INV) {
4002 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4003 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4004 
4005 		if (resp->flags & QEDR_RESP_RDMA)
4006 			return -EINVAL;
4007 
4008 	} else if (resp->flags & QEDR_RESP_RDMA) {
4009 		return -EINVAL;
4010 	}
4011 
4012 	return 0;
4013 }
4014 
4015 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4016 			       struct qedr_cq *cq, struct ib_wc *wc,
4017 			       struct rdma_cqe_responder *resp, u64 wr_id)
4018 {
4019 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4020 	wc->opcode = IB_WC_RECV;
4021 	wc->wc_flags = 0;
4022 
4023 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4024 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4025 			DP_ERR(dev,
4026 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4027 			       cq, cq->icid, resp->flags);
4028 
4029 	} else {
4030 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4031 		if (wc->status == IB_WC_GENERAL_ERR)
4032 			DP_ERR(dev,
4033 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4034 			       cq, cq->icid, resp->status);
4035 	}
4036 
4037 	/* Fill the rest of the WC */
4038 	wc->vendor_err = 0;
4039 	wc->src_qp = qp->id;
4040 	wc->qp = &qp->ibqp;
4041 	wc->wr_id = wr_id;
4042 }
4043 
4044 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4045 				struct qedr_cq *cq, struct ib_wc *wc,
4046 				struct rdma_cqe_responder *resp)
4047 {
4048 	struct qedr_srq *srq = qp->srq;
4049 	u64 wr_id;
4050 
4051 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4052 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4053 
4054 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4055 		wc->status = IB_WC_WR_FLUSH_ERR;
4056 		wc->vendor_err = 0;
4057 		wc->wr_id = wr_id;
4058 		wc->byte_len = 0;
4059 		wc->src_qp = qp->id;
4060 		wc->qp = &qp->ibqp;
4061 		wc->wr_id = wr_id;
4062 	} else {
4063 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4064 	}
4065 	srq->hw_srq.wr_cons_cnt++;
4066 
4067 	return 1;
4068 }
4069 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4070 			    struct qedr_cq *cq, struct ib_wc *wc,
4071 			    struct rdma_cqe_responder *resp)
4072 {
4073 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4074 
4075 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4076 
4077 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4078 		qed_chain_consume(&qp->rq.pbl);
4079 	qedr_inc_sw_cons(&qp->rq);
4080 
4081 	return 1;
4082 }
4083 
4084 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4085 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4086 {
4087 	u16 cnt = 0;
4088 
4089 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4090 		/* fill WC */
4091 		wc->status = IB_WC_WR_FLUSH_ERR;
4092 		wc->vendor_err = 0;
4093 		wc->wc_flags = 0;
4094 		wc->src_qp = qp->id;
4095 		wc->byte_len = 0;
4096 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4097 		wc->qp = &qp->ibqp;
4098 		num_entries--;
4099 		wc++;
4100 		cnt++;
4101 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4102 			qed_chain_consume(&qp->rq.pbl);
4103 		qedr_inc_sw_cons(&qp->rq);
4104 	}
4105 
4106 	return cnt;
4107 }
4108 
4109 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4110 				 struct rdma_cqe_responder *resp, int *update)
4111 {
4112 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4113 		consume_cqe(cq);
4114 		*update |= 1;
4115 	}
4116 }
4117 
4118 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4119 				 struct qedr_cq *cq, int num_entries,
4120 				 struct ib_wc *wc,
4121 				 struct rdma_cqe_responder *resp)
4122 {
4123 	int cnt;
4124 
4125 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4126 	consume_cqe(cq);
4127 
4128 	return cnt;
4129 }
4130 
4131 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4132 			     struct qedr_cq *cq, int num_entries,
4133 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4134 			     int *update)
4135 {
4136 	int cnt;
4137 
4138 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4139 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4140 					 resp->rq_cons_or_srq_id);
4141 		try_consume_resp_cqe(cq, qp, resp, update);
4142 	} else {
4143 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4144 		consume_cqe(cq);
4145 		*update |= 1;
4146 	}
4147 
4148 	return cnt;
4149 }
4150 
4151 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4152 				struct rdma_cqe_requester *req, int *update)
4153 {
4154 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4155 		consume_cqe(cq);
4156 		*update |= 1;
4157 	}
4158 }
4159 
4160 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4161 {
4162 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4163 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4164 	union rdma_cqe *cqe;
4165 	u32 old_cons, new_cons;
4166 	unsigned long flags;
4167 	int update = 0;
4168 	int done = 0;
4169 
4170 	if (cq->destroyed) {
4171 		DP_ERR(dev,
4172 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4173 		       cq, cq->icid);
4174 		return 0;
4175 	}
4176 
4177 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4178 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4179 
4180 	spin_lock_irqsave(&cq->cq_lock, flags);
4181 	cqe = cq->latest_cqe;
4182 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4183 	while (num_entries && is_valid_cqe(cq, cqe)) {
4184 		struct qedr_qp *qp;
4185 		int cnt = 0;
4186 
4187 		/* prevent speculative reads of any field of CQE */
4188 		rmb();
4189 
4190 		qp = cqe_get_qp(cqe);
4191 		if (!qp) {
4192 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4193 			break;
4194 		}
4195 
4196 		wc->qp = &qp->ibqp;
4197 
4198 		switch (cqe_get_type(cqe)) {
4199 		case RDMA_CQE_TYPE_REQUESTER:
4200 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4201 					       &cqe->req);
4202 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4203 			break;
4204 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4205 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4206 						&cqe->resp, &update);
4207 			break;
4208 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4209 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4210 						    wc, &cqe->resp);
4211 			update = 1;
4212 			break;
4213 		case RDMA_CQE_TYPE_INVALID:
4214 		default:
4215 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4216 			       cqe_get_type(cqe));
4217 		}
4218 		num_entries -= cnt;
4219 		wc += cnt;
4220 		done += cnt;
4221 
4222 		cqe = get_cqe(cq);
4223 	}
4224 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4225 
4226 	cq->cq_cons += new_cons - old_cons;
4227 
4228 	if (update)
4229 		/* doorbell notifies abount latest VALID entry,
4230 		 * but chain already point to the next INVALID one
4231 		 */
4232 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4233 
4234 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4235 	return done;
4236 }
4237 
4238 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4239 		     u8 port_num,
4240 		     const struct ib_wc *in_wc,
4241 		     const struct ib_grh *in_grh,
4242 		     const struct ib_mad_hdr *mad_hdr,
4243 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
4244 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4245 {
4246 	struct qedr_dev *dev = get_qedr_dev(ibdev);
4247 
4248 	DP_DEBUG(dev, QEDR_MSG_GSI,
4249 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
4250 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
4251 		 mad_hdr->class_specific, mad_hdr->class_version,
4252 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
4253 	return IB_MAD_RESULT_SUCCESS;
4254 }
4255