xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 89e33ea7)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_roce_cm.h"
53 
54 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
55 #define	RDMA_MAX_SGE_PER_SRQ	(4)
56 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
57 
58 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
59 
60 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
61 					size_t len)
62 {
63 	size_t min_len = min_t(size_t, len, udata->outlen);
64 
65 	return ib_copy_to_udata(udata, src, min_len);
66 }
67 
68 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
69 {
70 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
71 		return -EINVAL;
72 
73 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
74 	return 0;
75 }
76 
77 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
78 		      int index, union ib_gid *sgid)
79 {
80 	struct qedr_dev *dev = get_qedr_dev(ibdev);
81 
82 	memset(sgid->raw, 0, sizeof(sgid->raw));
83 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
84 
85 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
86 		 sgid->global.interface_id, sgid->global.subnet_prefix);
87 
88 	return 0;
89 }
90 
91 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
92 {
93 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
94 	struct qedr_device_attr *qattr = &dev->attr;
95 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
96 
97 	srq_attr->srq_limit = srq->srq_limit;
98 	srq_attr->max_wr = qattr->max_srq_wr;
99 	srq_attr->max_sge = qattr->max_sge;
100 
101 	return 0;
102 }
103 
104 int qedr_query_device(struct ib_device *ibdev,
105 		      struct ib_device_attr *attr, struct ib_udata *udata)
106 {
107 	struct qedr_dev *dev = get_qedr_dev(ibdev);
108 	struct qedr_device_attr *qattr = &dev->attr;
109 
110 	if (!dev->rdma_ctx) {
111 		DP_ERR(dev,
112 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
113 		       dev->rdma_ctx);
114 		return -EINVAL;
115 	}
116 
117 	memset(attr, 0, sizeof(*attr));
118 
119 	attr->fw_ver = qattr->fw_ver;
120 	attr->sys_image_guid = qattr->sys_image_guid;
121 	attr->max_mr_size = qattr->max_mr_size;
122 	attr->page_size_cap = qattr->page_size_caps;
123 	attr->vendor_id = qattr->vendor_id;
124 	attr->vendor_part_id = qattr->vendor_part_id;
125 	attr->hw_ver = qattr->hw_ver;
126 	attr->max_qp = qattr->max_qp;
127 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
128 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
129 	    IB_DEVICE_RC_RNR_NAK_GEN |
130 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
131 
132 	attr->max_send_sge = qattr->max_sge;
133 	attr->max_recv_sge = qattr->max_sge;
134 	attr->max_sge_rd = qattr->max_sge;
135 	attr->max_cq = qattr->max_cq;
136 	attr->max_cqe = qattr->max_cqe;
137 	attr->max_mr = qattr->max_mr;
138 	attr->max_mw = qattr->max_mw;
139 	attr->max_pd = qattr->max_pd;
140 	attr->atomic_cap = dev->atomic_cap;
141 	attr->max_fmr = qattr->max_fmr;
142 	attr->max_map_per_fmr = 16;
143 	attr->max_qp_init_rd_atom =
144 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
145 	attr->max_qp_rd_atom =
146 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
147 		attr->max_qp_init_rd_atom);
148 
149 	attr->max_srq = qattr->max_srq;
150 	attr->max_srq_sge = qattr->max_srq_sge;
151 	attr->max_srq_wr = qattr->max_srq_wr;
152 
153 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
154 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
155 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
156 	attr->max_ah = qattr->max_ah;
157 
158 	return 0;
159 }
160 
161 #define QEDR_SPEED_SDR		(1)
162 #define QEDR_SPEED_DDR		(2)
163 #define QEDR_SPEED_QDR		(4)
164 #define QEDR_SPEED_FDR10	(8)
165 #define QEDR_SPEED_FDR		(16)
166 #define QEDR_SPEED_EDR		(32)
167 
168 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
169 					    u8 *ib_width)
170 {
171 	switch (speed) {
172 	case 1000:
173 		*ib_speed = QEDR_SPEED_SDR;
174 		*ib_width = IB_WIDTH_1X;
175 		break;
176 	case 10000:
177 		*ib_speed = QEDR_SPEED_QDR;
178 		*ib_width = IB_WIDTH_1X;
179 		break;
180 
181 	case 20000:
182 		*ib_speed = QEDR_SPEED_DDR;
183 		*ib_width = IB_WIDTH_4X;
184 		break;
185 
186 	case 25000:
187 		*ib_speed = QEDR_SPEED_EDR;
188 		*ib_width = IB_WIDTH_1X;
189 		break;
190 
191 	case 40000:
192 		*ib_speed = QEDR_SPEED_QDR;
193 		*ib_width = IB_WIDTH_4X;
194 		break;
195 
196 	case 50000:
197 		*ib_speed = QEDR_SPEED_QDR;
198 		*ib_width = IB_WIDTH_4X;
199 		break;
200 
201 	case 100000:
202 		*ib_speed = QEDR_SPEED_EDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	default:
207 		/* Unsupported */
208 		*ib_speed = QEDR_SPEED_SDR;
209 		*ib_width = IB_WIDTH_1X;
210 	}
211 }
212 
213 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
214 {
215 	struct qedr_dev *dev;
216 	struct qed_rdma_port *rdma_port;
217 
218 	dev = get_qedr_dev(ibdev);
219 
220 	if (!dev->rdma_ctx) {
221 		DP_ERR(dev, "rdma_ctx is NULL\n");
222 		return -EINVAL;
223 	}
224 
225 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
226 
227 	/* *attr being zeroed by the caller, avoid zeroing it here */
228 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
229 		attr->state = IB_PORT_ACTIVE;
230 		attr->phys_state = 5;
231 	} else {
232 		attr->state = IB_PORT_DOWN;
233 		attr->phys_state = 3;
234 	}
235 	attr->max_mtu = IB_MTU_4096;
236 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
237 	attr->lid = 0;
238 	attr->lmc = 0;
239 	attr->sm_lid = 0;
240 	attr->sm_sl = 0;
241 	attr->ip_gids = true;
242 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
243 		attr->gid_tbl_len = 1;
244 		attr->pkey_tbl_len = 1;
245 	} else {
246 		attr->gid_tbl_len = QEDR_MAX_SGID;
247 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
248 	}
249 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
250 	attr->qkey_viol_cntr = 0;
251 	get_link_speed_and_width(rdma_port->link_speed,
252 				 &attr->active_speed, &attr->active_width);
253 	attr->max_msg_sz = rdma_port->max_msg_size;
254 	attr->max_vl_num = 4;
255 
256 	return 0;
257 }
258 
259 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
260 		     struct ib_port_modify *props)
261 {
262 	return 0;
263 }
264 
265 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
266 			 unsigned long len)
267 {
268 	struct qedr_mm *mm;
269 
270 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
271 	if (!mm)
272 		return -ENOMEM;
273 
274 	mm->key.phy_addr = phy_addr;
275 	/* This function might be called with a length which is not a multiple
276 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
277 	 * forces this granularity by increasing the requested size if needed.
278 	 * When qedr_mmap is called, it will search the list with the updated
279 	 * length as a key. To prevent search failures, the length is rounded up
280 	 * in advance to PAGE_SIZE.
281 	 */
282 	mm->key.len = roundup(len, PAGE_SIZE);
283 	INIT_LIST_HEAD(&mm->entry);
284 
285 	mutex_lock(&uctx->mm_list_lock);
286 	list_add(&mm->entry, &uctx->mm_head);
287 	mutex_unlock(&uctx->mm_list_lock);
288 
289 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
290 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
291 		 (unsigned long long)mm->key.phy_addr,
292 		 (unsigned long)mm->key.len, uctx);
293 
294 	return 0;
295 }
296 
297 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
298 			     unsigned long len)
299 {
300 	bool found = false;
301 	struct qedr_mm *mm;
302 
303 	mutex_lock(&uctx->mm_list_lock);
304 	list_for_each_entry(mm, &uctx->mm_head, entry) {
305 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
306 			continue;
307 
308 		found = true;
309 		break;
310 	}
311 	mutex_unlock(&uctx->mm_list_lock);
312 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
313 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
314 		 mm->key.phy_addr, mm->key.len, uctx, found);
315 
316 	return found;
317 }
318 
319 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
320 {
321 	struct ib_device *ibdev = uctx->device;
322 	int rc;
323 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
324 	struct qedr_alloc_ucontext_resp uresp = {};
325 	struct qedr_dev *dev = get_qedr_dev(ibdev);
326 	struct qed_rdma_add_user_out_params oparams;
327 
328 	if (!udata)
329 		return -EFAULT;
330 
331 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
332 	if (rc) {
333 		DP_ERR(dev,
334 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
335 		       rc);
336 		return rc;
337 	}
338 
339 	ctx->dpi = oparams.dpi;
340 	ctx->dpi_addr = oparams.dpi_addr;
341 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
342 	ctx->dpi_size = oparams.dpi_size;
343 	INIT_LIST_HEAD(&ctx->mm_head);
344 	mutex_init(&ctx->mm_list_lock);
345 
346 	uresp.dpm_enabled = dev->user_dpm_enabled;
347 	uresp.wids_enabled = 1;
348 	uresp.wid_count = oparams.wid_count;
349 	uresp.db_pa = ctx->dpi_phys_addr;
350 	uresp.db_size = ctx->dpi_size;
351 	uresp.max_send_wr = dev->attr.max_sqe;
352 	uresp.max_recv_wr = dev->attr.max_rqe;
353 	uresp.max_srq_wr = dev->attr.max_srq_wr;
354 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
355 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
356 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
357 	uresp.max_cqes = QEDR_MAX_CQES;
358 
359 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
360 	if (rc)
361 		return rc;
362 
363 	ctx->dev = dev;
364 
365 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
366 	if (rc)
367 		return rc;
368 
369 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
370 		 &ctx->ibucontext);
371 	return 0;
372 }
373 
374 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
375 {
376 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
377 	struct qedr_mm *mm, *tmp;
378 
379 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
380 		 uctx);
381 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
382 
383 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
384 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
385 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
386 			 mm->key.phy_addr, mm->key.len, uctx);
387 		list_del(&mm->entry);
388 		kfree(mm);
389 	}
390 }
391 
392 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
393 {
394 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
395 	struct qedr_dev *dev = get_qedr_dev(context->device);
396 	unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
397 	unsigned long len = (vma->vm_end - vma->vm_start);
398 	unsigned long dpi_start;
399 
400 	dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
401 
402 	DP_DEBUG(dev, QEDR_MSG_INIT,
403 		 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
404 		 (void *)vma->vm_start, (void *)vma->vm_end,
405 		 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
406 
407 	if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
408 		DP_ERR(dev,
409 		       "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
410 		       (void *)vma->vm_start, (void *)vma->vm_end);
411 		return -EINVAL;
412 	}
413 
414 	if (!qedr_search_mmap(ucontext, phys_addr, len)) {
415 		DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
416 		       vma->vm_pgoff);
417 		return -EINVAL;
418 	}
419 
420 	if (phys_addr < dpi_start ||
421 	    ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
422 		DP_ERR(dev,
423 		       "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
424 		       (void *)phys_addr, (void *)dpi_start,
425 		       ucontext->dpi_size);
426 		return -EINVAL;
427 	}
428 
429 	if (vma->vm_flags & VM_READ) {
430 		DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
431 		return -EINVAL;
432 	}
433 
434 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
435 	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
436 				  vma->vm_page_prot);
437 }
438 
439 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
440 		  struct ib_udata *udata)
441 {
442 	struct ib_device *ibdev = ibpd->device;
443 	struct qedr_dev *dev = get_qedr_dev(ibdev);
444 	struct qedr_pd *pd = get_qedr_pd(ibpd);
445 	u16 pd_id;
446 	int rc;
447 
448 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
449 		 (udata && context) ? "User Lib" : "Kernel");
450 
451 	if (!dev->rdma_ctx) {
452 		DP_ERR(dev, "invalid RDMA context\n");
453 		return -EINVAL;
454 	}
455 
456 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
457 	if (rc)
458 		return rc;
459 
460 	pd->pd_id = pd_id;
461 
462 	if (udata && context) {
463 		struct qedr_alloc_pd_uresp uresp = {
464 			.pd_id = pd_id,
465 		};
466 
467 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
468 		if (rc) {
469 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
470 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
471 			return rc;
472 		}
473 
474 		pd->uctx = get_qedr_ucontext(context);
475 		pd->uctx->pd = pd;
476 	}
477 
478 	return 0;
479 }
480 
481 void qedr_dealloc_pd(struct ib_pd *ibpd)
482 {
483 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
484 	struct qedr_pd *pd = get_qedr_pd(ibpd);
485 
486 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
487 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
488 }
489 
490 static void qedr_free_pbl(struct qedr_dev *dev,
491 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
492 {
493 	struct pci_dev *pdev = dev->pdev;
494 	int i;
495 
496 	for (i = 0; i < pbl_info->num_pbls; i++) {
497 		if (!pbl[i].va)
498 			continue;
499 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
500 				  pbl[i].va, pbl[i].pa);
501 	}
502 
503 	kfree(pbl);
504 }
505 
506 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
507 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
508 
509 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
510 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
511 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
512 
513 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
514 					   struct qedr_pbl_info *pbl_info,
515 					   gfp_t flags)
516 {
517 	struct pci_dev *pdev = dev->pdev;
518 	struct qedr_pbl *pbl_table;
519 	dma_addr_t *pbl_main_tbl;
520 	dma_addr_t pa;
521 	void *va;
522 	int i;
523 
524 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
525 	if (!pbl_table)
526 		return ERR_PTR(-ENOMEM);
527 
528 	for (i = 0; i < pbl_info->num_pbls; i++) {
529 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
530 					flags);
531 		if (!va)
532 			goto err;
533 
534 		pbl_table[i].va = va;
535 		pbl_table[i].pa = pa;
536 	}
537 
538 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
539 	 * the first one with physical pointers to all of the rest
540 	 */
541 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
542 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
543 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
544 
545 	return pbl_table;
546 
547 err:
548 	for (i--; i >= 0; i--)
549 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
550 				  pbl_table[i].va, pbl_table[i].pa);
551 
552 	qedr_free_pbl(dev, pbl_info, pbl_table);
553 
554 	return ERR_PTR(-ENOMEM);
555 }
556 
557 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
558 				struct qedr_pbl_info *pbl_info,
559 				u32 num_pbes, int two_layer_capable)
560 {
561 	u32 pbl_capacity;
562 	u32 pbl_size;
563 	u32 num_pbls;
564 
565 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
566 		if (num_pbes > MAX_PBES_TWO_LAYER) {
567 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
568 			       num_pbes);
569 			return -EINVAL;
570 		}
571 
572 		/* calculate required pbl page size */
573 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
574 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
575 			       NUM_PBES_ON_PAGE(pbl_size);
576 
577 		while (pbl_capacity < num_pbes) {
578 			pbl_size *= 2;
579 			pbl_capacity = pbl_size / sizeof(u64);
580 			pbl_capacity = pbl_capacity * pbl_capacity;
581 		}
582 
583 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
584 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
585 		pbl_info->two_layered = true;
586 	} else {
587 		/* One layered PBL */
588 		num_pbls = 1;
589 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
590 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
591 		pbl_info->two_layered = false;
592 	}
593 
594 	pbl_info->num_pbls = num_pbls;
595 	pbl_info->pbl_size = pbl_size;
596 	pbl_info->num_pbes = num_pbes;
597 
598 	DP_DEBUG(dev, QEDR_MSG_MR,
599 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
600 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
601 
602 	return 0;
603 }
604 
605 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
606 			       struct qedr_pbl *pbl,
607 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
608 {
609 	int pbe_cnt, total_num_pbes = 0;
610 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
611 	struct qedr_pbl *pbl_tbl;
612 	struct sg_dma_page_iter sg_iter;
613 	struct regpair *pbe;
614 	u64 pg_addr;
615 
616 	if (!pbl_info->num_pbes)
617 		return;
618 
619 	/* If we have a two layered pbl, the first pbl points to the rest
620 	 * of the pbls and the first entry lays on the second pbl in the table
621 	 */
622 	if (pbl_info->two_layered)
623 		pbl_tbl = &pbl[1];
624 	else
625 		pbl_tbl = pbl;
626 
627 	pbe = (struct regpair *)pbl_tbl->va;
628 	if (!pbe) {
629 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
630 		return;
631 	}
632 
633 	pbe_cnt = 0;
634 
635 	fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
636 
637 	for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
638 		pg_addr = sg_page_iter_dma_address(&sg_iter);
639 		for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
640 			pbe->lo = cpu_to_le32(pg_addr);
641 			pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
642 
643 			pg_addr += BIT(pg_shift);
644 			pbe_cnt++;
645 			total_num_pbes++;
646 			pbe++;
647 
648 			if (total_num_pbes == pbl_info->num_pbes)
649 				return;
650 
651 			/* If the given pbl is full storing the pbes,
652 			 * move to next pbl.
653 			 */
654 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
655 				pbl_tbl++;
656 				pbe = (struct regpair *)pbl_tbl->va;
657 				pbe_cnt = 0;
658 			}
659 
660 			fw_pg_cnt++;
661 		}
662 	}
663 }
664 
665 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
666 			      struct qedr_cq *cq, struct ib_udata *udata)
667 {
668 	struct qedr_create_cq_uresp uresp;
669 	int rc;
670 
671 	memset(&uresp, 0, sizeof(uresp));
672 
673 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
674 	uresp.icid = cq->icid;
675 
676 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
677 	if (rc)
678 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
679 
680 	return rc;
681 }
682 
683 static void consume_cqe(struct qedr_cq *cq)
684 {
685 	if (cq->latest_cqe == cq->toggle_cqe)
686 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
687 
688 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
689 }
690 
691 static inline int qedr_align_cq_entries(int entries)
692 {
693 	u64 size, aligned_size;
694 
695 	/* We allocate an extra entry that we don't report to the FW. */
696 	size = (entries + 1) * QEDR_CQE_SIZE;
697 	aligned_size = ALIGN(size, PAGE_SIZE);
698 
699 	return aligned_size / QEDR_CQE_SIZE;
700 }
701 
702 static inline int qedr_init_user_queue(struct ib_udata *udata,
703 				       struct qedr_dev *dev,
704 				       struct qedr_userq *q, u64 buf_addr,
705 				       size_t buf_len, int access, int dmasync,
706 				       int alloc_and_init)
707 {
708 	u32 fw_pages;
709 	int rc;
710 
711 	q->buf_addr = buf_addr;
712 	q->buf_len = buf_len;
713 	q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync);
714 	if (IS_ERR(q->umem)) {
715 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
716 		       PTR_ERR(q->umem));
717 		return PTR_ERR(q->umem);
718 	}
719 
720 	fw_pages = ib_umem_page_count(q->umem) <<
721 	    (PAGE_SHIFT - FW_PAGE_SHIFT);
722 
723 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
724 	if (rc)
725 		goto err0;
726 
727 	if (alloc_and_init) {
728 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
729 		if (IS_ERR(q->pbl_tbl)) {
730 			rc = PTR_ERR(q->pbl_tbl);
731 			goto err0;
732 		}
733 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
734 				   FW_PAGE_SHIFT);
735 	} else {
736 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
737 		if (!q->pbl_tbl) {
738 			rc = -ENOMEM;
739 			goto err0;
740 		}
741 	}
742 
743 	return 0;
744 
745 err0:
746 	ib_umem_release(q->umem);
747 	q->umem = NULL;
748 
749 	return rc;
750 }
751 
752 static inline void qedr_init_cq_params(struct qedr_cq *cq,
753 				       struct qedr_ucontext *ctx,
754 				       struct qedr_dev *dev, int vector,
755 				       int chain_entries, int page_cnt,
756 				       u64 pbl_ptr,
757 				       struct qed_rdma_create_cq_in_params
758 				       *params)
759 {
760 	memset(params, 0, sizeof(*params));
761 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
762 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
763 	params->cnq_id = vector;
764 	params->cq_size = chain_entries - 1;
765 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
766 	params->pbl_num_pages = page_cnt;
767 	params->pbl_ptr = pbl_ptr;
768 	params->pbl_two_level = 0;
769 }
770 
771 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
772 {
773 	cq->db.data.agg_flags = flags;
774 	cq->db.data.value = cpu_to_le32(cons);
775 	writeq(cq->db.raw, cq->db_addr);
776 
777 	/* Make sure write would stick */
778 	mmiowb();
779 }
780 
781 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
782 {
783 	struct qedr_cq *cq = get_qedr_cq(ibcq);
784 	unsigned long sflags;
785 	struct qedr_dev *dev;
786 
787 	dev = get_qedr_dev(ibcq->device);
788 
789 	if (cq->destroyed) {
790 		DP_ERR(dev,
791 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
792 		       cq, cq->icid);
793 		return -EINVAL;
794 	}
795 
796 
797 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
798 		return 0;
799 
800 	spin_lock_irqsave(&cq->cq_lock, sflags);
801 
802 	cq->arm_flags = 0;
803 
804 	if (flags & IB_CQ_SOLICITED)
805 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
806 
807 	if (flags & IB_CQ_NEXT_COMP)
808 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
809 
810 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
811 
812 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
813 
814 	return 0;
815 }
816 
817 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
818 			     const struct ib_cq_init_attr *attr,
819 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
820 {
821 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
822 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
823 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
824 	struct qedr_dev *dev = get_qedr_dev(ibdev);
825 	struct qed_rdma_create_cq_in_params params;
826 	struct qedr_create_cq_ureq ureq;
827 	int vector = attr->comp_vector;
828 	int entries = attr->cqe;
829 	struct qedr_cq *cq;
830 	int chain_entries;
831 	int page_cnt;
832 	u64 pbl_ptr;
833 	u16 icid;
834 	int rc;
835 
836 	DP_DEBUG(dev, QEDR_MSG_INIT,
837 		 "create_cq: called from %s. entries=%d, vector=%d\n",
838 		 udata ? "User Lib" : "Kernel", entries, vector);
839 
840 	if (entries > QEDR_MAX_CQES) {
841 		DP_ERR(dev,
842 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
843 		       entries, QEDR_MAX_CQES);
844 		return ERR_PTR(-EINVAL);
845 	}
846 
847 	chain_entries = qedr_align_cq_entries(entries);
848 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
849 
850 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
851 	if (!cq)
852 		return ERR_PTR(-ENOMEM);
853 
854 	if (udata) {
855 		memset(&ureq, 0, sizeof(ureq));
856 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
857 			DP_ERR(dev,
858 			       "create cq: problem copying data from user space\n");
859 			goto err0;
860 		}
861 
862 		if (!ureq.len) {
863 			DP_ERR(dev,
864 			       "create cq: cannot create a cq with 0 entries\n");
865 			goto err0;
866 		}
867 
868 		cq->cq_type = QEDR_CQ_TYPE_USER;
869 
870 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
871 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1,
872 					  1);
873 		if (rc)
874 			goto err0;
875 
876 		pbl_ptr = cq->q.pbl_tbl->pa;
877 		page_cnt = cq->q.pbl_info.num_pbes;
878 
879 		cq->ibcq.cqe = chain_entries;
880 	} else {
881 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
882 
883 		rc = dev->ops->common->chain_alloc(dev->cdev,
884 						   QED_CHAIN_USE_TO_CONSUME,
885 						   QED_CHAIN_MODE_PBL,
886 						   QED_CHAIN_CNT_TYPE_U32,
887 						   chain_entries,
888 						   sizeof(union rdma_cqe),
889 						   &cq->pbl, NULL);
890 		if (rc)
891 			goto err1;
892 
893 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
894 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
895 		cq->ibcq.cqe = cq->pbl.capacity;
896 	}
897 
898 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
899 			    pbl_ptr, &params);
900 
901 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
902 	if (rc)
903 		goto err2;
904 
905 	cq->icid = icid;
906 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
907 	spin_lock_init(&cq->cq_lock);
908 
909 	if (ib_ctx) {
910 		rc = qedr_copy_cq_uresp(dev, cq, udata);
911 		if (rc)
912 			goto err3;
913 	} else {
914 		/* Generate doorbell address. */
915 		cq->db_addr = dev->db_addr +
916 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
917 		cq->db.data.icid = cq->icid;
918 		cq->db.data.params = DB_AGG_CMD_SET <<
919 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
920 
921 		/* point to the very last element, passing it we will toggle */
922 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
923 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
924 		cq->latest_cqe = NULL;
925 		consume_cqe(cq);
926 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
927 	}
928 
929 	DP_DEBUG(dev, QEDR_MSG_CQ,
930 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
931 		 cq->icid, cq, params.cq_size);
932 
933 	return &cq->ibcq;
934 
935 err3:
936 	destroy_iparams.icid = cq->icid;
937 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
938 				  &destroy_oparams);
939 err2:
940 	if (udata)
941 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
942 	else
943 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
944 err1:
945 	if (udata)
946 		ib_umem_release(cq->q.umem);
947 err0:
948 	kfree(cq);
949 	return ERR_PTR(-EINVAL);
950 }
951 
952 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
953 {
954 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
955 	struct qedr_cq *cq = get_qedr_cq(ibcq);
956 
957 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
958 
959 	return 0;
960 }
961 
962 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
963 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
964 
965 int qedr_destroy_cq(struct ib_cq *ibcq)
966 {
967 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
968 	struct qed_rdma_destroy_cq_out_params oparams;
969 	struct qed_rdma_destroy_cq_in_params iparams;
970 	struct qedr_cq *cq = get_qedr_cq(ibcq);
971 	int iter;
972 	int rc;
973 
974 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
975 
976 	cq->destroyed = 1;
977 
978 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
979 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
980 		goto done;
981 
982 	iparams.icid = cq->icid;
983 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
984 	if (rc)
985 		return rc;
986 
987 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
988 
989 	if (ibcq->uobject && ibcq->uobject->context) {
990 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
991 		ib_umem_release(cq->q.umem);
992 	}
993 
994 	/* We don't want the IRQ handler to handle a non-existing CQ so we
995 	 * wait until all CNQ interrupts, if any, are received. This will always
996 	 * happen and will always happen very fast. If not, then a serious error
997 	 * has occured. That is why we can use a long delay.
998 	 * We spin for a short time so we don’t lose time on context switching
999 	 * in case all the completions are handled in that span. Otherwise
1000 	 * we sleep for a while and check again. Since the CNQ may be
1001 	 * associated with (only) the current CPU we use msleep to allow the
1002 	 * current CPU to be freed.
1003 	 * The CNQ notification is increased in qedr_irq_handler().
1004 	 */
1005 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1006 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1007 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1008 		iter--;
1009 	}
1010 
1011 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1012 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1013 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1014 		iter--;
1015 	}
1016 
1017 	if (oparams.num_cq_notif != cq->cnq_notif)
1018 		goto err;
1019 
1020 	/* Note that we don't need to have explicit code to wait for the
1021 	 * completion of the event handler because it is invoked from the EQ.
1022 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1023 	 * be certain that there's no event handler in process.
1024 	 */
1025 done:
1026 	cq->sig = ~cq->sig;
1027 
1028 	kfree(cq);
1029 
1030 	return 0;
1031 
1032 err:
1033 	DP_ERR(dev,
1034 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1035 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1036 
1037 	return -EINVAL;
1038 }
1039 
1040 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1041 					  struct ib_qp_attr *attr,
1042 					  int attr_mask,
1043 					  struct qed_rdma_modify_qp_in_params
1044 					  *qp_params)
1045 {
1046 	const struct ib_gid_attr *gid_attr;
1047 	enum rdma_network_type nw_type;
1048 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1049 	u32 ipv4_addr;
1050 	int i;
1051 
1052 	gid_attr = grh->sgid_attr;
1053 	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev);
1054 
1055 	nw_type = rdma_gid_attr_network_type(gid_attr);
1056 	switch (nw_type) {
1057 	case RDMA_NETWORK_IPV6:
1058 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1059 		       sizeof(qp_params->sgid));
1060 		memcpy(&qp_params->dgid.bytes[0],
1061 		       &grh->dgid,
1062 		       sizeof(qp_params->dgid));
1063 		qp_params->roce_mode = ROCE_V2_IPV6;
1064 		SET_FIELD(qp_params->modify_flags,
1065 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1066 		break;
1067 	case RDMA_NETWORK_IB:
1068 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1069 		       sizeof(qp_params->sgid));
1070 		memcpy(&qp_params->dgid.bytes[0],
1071 		       &grh->dgid,
1072 		       sizeof(qp_params->dgid));
1073 		qp_params->roce_mode = ROCE_V1;
1074 		break;
1075 	case RDMA_NETWORK_IPV4:
1076 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1077 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1078 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1079 		qp_params->sgid.ipv4_addr = ipv4_addr;
1080 		ipv4_addr =
1081 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1082 		qp_params->dgid.ipv4_addr = ipv4_addr;
1083 		SET_FIELD(qp_params->modify_flags,
1084 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1085 		qp_params->roce_mode = ROCE_V2_IPV4;
1086 		break;
1087 	}
1088 
1089 	for (i = 0; i < 4; i++) {
1090 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1091 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1092 	}
1093 
1094 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1095 		qp_params->vlan_id = 0;
1096 
1097 	return 0;
1098 }
1099 
1100 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1101 			       struct ib_qp_init_attr *attrs,
1102 			       struct ib_udata *udata)
1103 {
1104 	struct qedr_device_attr *qattr = &dev->attr;
1105 
1106 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1107 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1108 		DP_DEBUG(dev, QEDR_MSG_QP,
1109 			 "create qp: unsupported qp type=0x%x requested\n",
1110 			 attrs->qp_type);
1111 		return -EINVAL;
1112 	}
1113 
1114 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1115 		DP_ERR(dev,
1116 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1117 		       attrs->cap.max_send_wr, qattr->max_sqe);
1118 		return -EINVAL;
1119 	}
1120 
1121 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1122 		DP_ERR(dev,
1123 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1124 		       attrs->cap.max_inline_data, qattr->max_inline);
1125 		return -EINVAL;
1126 	}
1127 
1128 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1129 		DP_ERR(dev,
1130 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1131 		       attrs->cap.max_send_sge, qattr->max_sge);
1132 		return -EINVAL;
1133 	}
1134 
1135 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1136 		DP_ERR(dev,
1137 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1138 		       attrs->cap.max_recv_sge, qattr->max_sge);
1139 		return -EINVAL;
1140 	}
1141 
1142 	/* Unprivileged user space cannot create special QP */
1143 	if (udata && attrs->qp_type == IB_QPT_GSI) {
1144 		DP_ERR(dev,
1145 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1146 		       attrs->qp_type);
1147 		return -EINVAL;
1148 	}
1149 
1150 	return 0;
1151 }
1152 
1153 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1154 			       struct qedr_srq *srq, struct ib_udata *udata)
1155 {
1156 	struct qedr_create_srq_uresp uresp = {};
1157 	int rc;
1158 
1159 	uresp.srq_id = srq->srq_id;
1160 
1161 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1162 	if (rc)
1163 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1164 
1165 	return rc;
1166 }
1167 
1168 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1169 			       struct qedr_create_qp_uresp *uresp,
1170 			       struct qedr_qp *qp)
1171 {
1172 	/* iWARP requires two doorbells per RQ. */
1173 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1174 		uresp->rq_db_offset =
1175 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1176 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1177 	} else {
1178 		uresp->rq_db_offset =
1179 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1180 	}
1181 
1182 	uresp->rq_icid = qp->icid;
1183 }
1184 
1185 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1186 			       struct qedr_create_qp_uresp *uresp,
1187 			       struct qedr_qp *qp)
1188 {
1189 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1190 
1191 	/* iWARP uses the same cid for rq and sq */
1192 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1193 		uresp->sq_icid = qp->icid;
1194 	else
1195 		uresp->sq_icid = qp->icid + 1;
1196 }
1197 
1198 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1199 			      struct qedr_qp *qp, struct ib_udata *udata)
1200 {
1201 	struct qedr_create_qp_uresp uresp;
1202 	int rc;
1203 
1204 	memset(&uresp, 0, sizeof(uresp));
1205 	qedr_copy_sq_uresp(dev, &uresp, qp);
1206 	qedr_copy_rq_uresp(dev, &uresp, qp);
1207 
1208 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1209 	uresp.qp_id = qp->qp_id;
1210 
1211 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1212 	if (rc)
1213 		DP_ERR(dev,
1214 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1215 		       qp->icid);
1216 
1217 	return rc;
1218 }
1219 
1220 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1221 				      struct qedr_qp *qp,
1222 				      struct qedr_pd *pd,
1223 				      struct ib_qp_init_attr *attrs)
1224 {
1225 	spin_lock_init(&qp->q_lock);
1226 	atomic_set(&qp->refcnt, 1);
1227 	qp->pd = pd;
1228 	qp->qp_type = attrs->qp_type;
1229 	qp->max_inline_data = attrs->cap.max_inline_data;
1230 	qp->sq.max_sges = attrs->cap.max_send_sge;
1231 	qp->state = QED_ROCE_QP_STATE_RESET;
1232 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1233 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1234 	qp->dev = dev;
1235 
1236 	if (attrs->srq) {
1237 		qp->srq = get_qedr_srq(attrs->srq);
1238 	} else {
1239 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1240 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1241 		DP_DEBUG(dev, QEDR_MSG_QP,
1242 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1243 			 qp->rq.max_sges, qp->rq_cq->icid);
1244 	}
1245 
1246 	DP_DEBUG(dev, QEDR_MSG_QP,
1247 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1248 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1249 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1250 	DP_DEBUG(dev, QEDR_MSG_QP,
1251 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1252 		 qp->sq.max_sges, qp->sq_cq->icid);
1253 }
1254 
1255 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1256 {
1257 	qp->sq.db = dev->db_addr +
1258 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1259 	qp->sq.db_data.data.icid = qp->icid + 1;
1260 	if (!qp->srq) {
1261 		qp->rq.db = dev->db_addr +
1262 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1263 		qp->rq.db_data.data.icid = qp->icid;
1264 	}
1265 }
1266 
1267 static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev,
1268 				 struct ib_srq_init_attr *attrs,
1269 				 struct ib_udata *udata)
1270 {
1271 	struct qedr_device_attr *qattr = &dev->attr;
1272 
1273 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1274 		DP_ERR(dev,
1275 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1276 		       attrs->attr.max_wr, qattr->max_srq_wr);
1277 		return -EINVAL;
1278 	}
1279 
1280 	if (attrs->attr.max_sge > qattr->max_sge) {
1281 		DP_ERR(dev,
1282 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1283 		       attrs->attr.max_sge, qattr->max_sge);
1284 		return -EINVAL;
1285 	}
1286 
1287 	return 0;
1288 }
1289 
1290 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1291 {
1292 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1293 	ib_umem_release(srq->usrq.umem);
1294 	ib_umem_release(srq->prod_umem);
1295 }
1296 
1297 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1298 {
1299 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1300 	struct qedr_dev *dev = srq->dev;
1301 
1302 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1303 
1304 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1305 			  hw_srq->virt_prod_pair_addr,
1306 			  hw_srq->phy_prod_pair_addr);
1307 }
1308 
1309 static int qedr_init_srq_user_params(struct ib_udata *udata,
1310 				     struct qedr_srq *srq,
1311 				     struct qedr_create_srq_ureq *ureq,
1312 				     int access, int dmasync)
1313 {
1314 	struct scatterlist *sg;
1315 	int rc;
1316 
1317 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1318 				  ureq->srq_len, access, dmasync, 1);
1319 	if (rc)
1320 		return rc;
1321 
1322 	srq->prod_umem =
1323 		ib_umem_get(udata, ureq->prod_pair_addr,
1324 			    sizeof(struct rdma_srq_producers), access, dmasync);
1325 	if (IS_ERR(srq->prod_umem)) {
1326 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1327 		ib_umem_release(srq->usrq.umem);
1328 		DP_ERR(srq->dev,
1329 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1330 		       PTR_ERR(srq->prod_umem));
1331 		return PTR_ERR(srq->prod_umem);
1332 	}
1333 
1334 	sg = srq->prod_umem->sg_head.sgl;
1335 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1336 
1337 	return 0;
1338 }
1339 
1340 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1341 					struct qedr_dev *dev,
1342 					struct ib_srq_init_attr *init_attr)
1343 {
1344 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1345 	dma_addr_t phy_prod_pair_addr;
1346 	u32 num_elems;
1347 	void *va;
1348 	int rc;
1349 
1350 	va = dma_alloc_coherent(&dev->pdev->dev,
1351 				sizeof(struct rdma_srq_producers),
1352 				&phy_prod_pair_addr, GFP_KERNEL);
1353 	if (!va) {
1354 		DP_ERR(dev,
1355 		       "create srq: failed to allocate dma memory for producer\n");
1356 		return -ENOMEM;
1357 	}
1358 
1359 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1360 	hw_srq->virt_prod_pair_addr = va;
1361 
1362 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1363 	rc = dev->ops->common->chain_alloc(dev->cdev,
1364 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1365 					   QED_CHAIN_MODE_PBL,
1366 					   QED_CHAIN_CNT_TYPE_U32,
1367 					   num_elems,
1368 					   QEDR_SRQ_WQE_ELEM_SIZE,
1369 					   &hw_srq->pbl, NULL);
1370 	if (rc)
1371 		goto err0;
1372 
1373 	hw_srq->num_elems = num_elems;
1374 
1375 	return 0;
1376 
1377 err0:
1378 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1379 			  va, phy_prod_pair_addr);
1380 	return rc;
1381 }
1382 
1383 static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
1384 			void *ptr, u32 id);
1385 static void qedr_idr_remove(struct qedr_dev *dev,
1386 			    struct qedr_idr *qidr, u32 id);
1387 
1388 struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
1389 			       struct ib_srq_init_attr *init_attr,
1390 			       struct ib_udata *udata)
1391 {
1392 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1393 	struct qed_rdma_create_srq_in_params in_params = {};
1394 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1395 	struct qed_rdma_create_srq_out_params out_params;
1396 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1397 	struct qedr_create_srq_ureq ureq = {};
1398 	u64 pbl_base_addr, phy_prod_pair_addr;
1399 	struct qedr_srq_hwq_info *hw_srq;
1400 	u32 page_cnt, page_size;
1401 	struct qedr_srq *srq;
1402 	int rc = 0;
1403 
1404 	DP_DEBUG(dev, QEDR_MSG_QP,
1405 		 "create SRQ called from %s (pd %p)\n",
1406 		 (udata) ? "User lib" : "kernel", pd);
1407 
1408 	rc = qedr_check_srq_params(ibpd, dev, init_attr, udata);
1409 	if (rc)
1410 		return ERR_PTR(-EINVAL);
1411 
1412 	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1413 	if (!srq)
1414 		return ERR_PTR(-ENOMEM);
1415 
1416 	srq->dev = dev;
1417 	hw_srq = &srq->hw_srq;
1418 	spin_lock_init(&srq->lock);
1419 
1420 	hw_srq->max_wr = init_attr->attr.max_wr;
1421 	hw_srq->max_sges = init_attr->attr.max_sge;
1422 
1423 	if (udata) {
1424 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
1425 			DP_ERR(dev,
1426 			       "create srq: problem copying data from user space\n");
1427 			goto err0;
1428 		}
1429 
1430 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0);
1431 		if (rc)
1432 			goto err0;
1433 
1434 		page_cnt = srq->usrq.pbl_info.num_pbes;
1435 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1436 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1437 		page_size = PAGE_SIZE;
1438 	} else {
1439 		struct qed_chain *pbl;
1440 
1441 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1442 		if (rc)
1443 			goto err0;
1444 
1445 		pbl = &hw_srq->pbl;
1446 		page_cnt = qed_chain_get_page_cnt(pbl);
1447 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1448 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1449 		page_size = QED_CHAIN_PAGE_SIZE;
1450 	}
1451 
1452 	in_params.pd_id = pd->pd_id;
1453 	in_params.pbl_base_addr = pbl_base_addr;
1454 	in_params.prod_pair_addr = phy_prod_pair_addr;
1455 	in_params.num_pages = page_cnt;
1456 	in_params.page_size = page_size;
1457 
1458 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1459 	if (rc)
1460 		goto err1;
1461 
1462 	srq->srq_id = out_params.srq_id;
1463 
1464 	if (udata) {
1465 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1466 		if (rc)
1467 			goto err2;
1468 	}
1469 
1470 	rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id);
1471 	if (rc)
1472 		goto err2;
1473 
1474 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1475 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1476 	return &srq->ibsrq;
1477 
1478 err2:
1479 	destroy_in_params.srq_id = srq->srq_id;
1480 
1481 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1482 err1:
1483 	if (udata)
1484 		qedr_free_srq_user_params(srq);
1485 	else
1486 		qedr_free_srq_kernel_params(srq);
1487 err0:
1488 	kfree(srq);
1489 
1490 	return ERR_PTR(-EFAULT);
1491 }
1492 
1493 int qedr_destroy_srq(struct ib_srq *ibsrq)
1494 {
1495 	struct qed_rdma_destroy_srq_in_params in_params = {};
1496 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1497 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1498 
1499 	qedr_idr_remove(dev, &dev->srqidr, srq->srq_id);
1500 	in_params.srq_id = srq->srq_id;
1501 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1502 
1503 	if (ibsrq->uobject)
1504 		qedr_free_srq_user_params(srq);
1505 	else
1506 		qedr_free_srq_kernel_params(srq);
1507 
1508 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1509 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1510 		 srq->srq_id);
1511 	kfree(srq);
1512 
1513 	return 0;
1514 }
1515 
1516 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1517 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1518 {
1519 	struct qed_rdma_modify_srq_in_params in_params = {};
1520 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1521 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1522 	int rc;
1523 
1524 	if (attr_mask & IB_SRQ_MAX_WR) {
1525 		DP_ERR(dev,
1526 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1527 		       attr_mask, srq);
1528 		return -EINVAL;
1529 	}
1530 
1531 	if (attr_mask & IB_SRQ_LIMIT) {
1532 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1533 			DP_ERR(dev,
1534 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1535 			       attr->srq_limit, srq->hw_srq.max_wr);
1536 			return -EINVAL;
1537 		}
1538 
1539 		in_params.srq_id = srq->srq_id;
1540 		in_params.wqe_limit = attr->srq_limit;
1541 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1542 		if (rc)
1543 			return rc;
1544 	}
1545 
1546 	srq->srq_limit = attr->srq_limit;
1547 
1548 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1549 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1550 
1551 	return 0;
1552 }
1553 
1554 static inline void
1555 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1556 			      struct qedr_pd *pd,
1557 			      struct qedr_qp *qp,
1558 			      struct ib_qp_init_attr *attrs,
1559 			      bool fmr_and_reserved_lkey,
1560 			      struct qed_rdma_create_qp_in_params *params)
1561 {
1562 	/* QP handle to be written in an async event */
1563 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1564 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1565 
1566 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1567 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1568 	params->pd = pd->pd_id;
1569 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1570 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1571 	params->stats_queue = 0;
1572 	params->srq_id = 0;
1573 	params->use_srq = false;
1574 
1575 	if (!qp->srq) {
1576 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1577 
1578 	} else {
1579 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1580 		params->srq_id = qp->srq->srq_id;
1581 		params->use_srq = true;
1582 	}
1583 }
1584 
1585 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1586 {
1587 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1588 		 "qp=%p. "
1589 		 "sq_addr=0x%llx, "
1590 		 "sq_len=%zd, "
1591 		 "rq_addr=0x%llx, "
1592 		 "rq_len=%zd"
1593 		 "\n",
1594 		 qp,
1595 		 qp->usq.buf_addr,
1596 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1597 }
1598 
1599 static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
1600 			void *ptr, u32 id)
1601 {
1602 	int rc;
1603 
1604 	idr_preload(GFP_KERNEL);
1605 	spin_lock_irq(&qidr->idr_lock);
1606 
1607 	rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC);
1608 
1609 	spin_unlock_irq(&qidr->idr_lock);
1610 	idr_preload_end();
1611 
1612 	return rc < 0 ? rc : 0;
1613 }
1614 
1615 static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id)
1616 {
1617 	spin_lock_irq(&qidr->idr_lock);
1618 	idr_remove(&qidr->idr, id);
1619 	spin_unlock_irq(&qidr->idr_lock);
1620 }
1621 
1622 static inline void
1623 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1624 			    struct qedr_qp *qp,
1625 			    struct qed_rdma_create_qp_out_params *out_params)
1626 {
1627 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1628 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1629 
1630 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1631 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1632 	if (!qp->srq) {
1633 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1634 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1635 	}
1636 
1637 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1638 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1639 }
1640 
1641 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1642 {
1643 	if (qp->usq.umem)
1644 		ib_umem_release(qp->usq.umem);
1645 	qp->usq.umem = NULL;
1646 
1647 	if (qp->urq.umem)
1648 		ib_umem_release(qp->urq.umem);
1649 	qp->urq.umem = NULL;
1650 }
1651 
1652 static int qedr_create_user_qp(struct qedr_dev *dev,
1653 			       struct qedr_qp *qp,
1654 			       struct ib_pd *ibpd,
1655 			       struct ib_udata *udata,
1656 			       struct ib_qp_init_attr *attrs)
1657 {
1658 	struct qed_rdma_create_qp_in_params in_params;
1659 	struct qed_rdma_create_qp_out_params out_params;
1660 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1661 	struct qedr_create_qp_ureq ureq;
1662 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1663 	int rc = -EINVAL;
1664 
1665 	memset(&ureq, 0, sizeof(ureq));
1666 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1667 	if (rc) {
1668 		DP_ERR(dev, "Problem copying data from user space\n");
1669 		return rc;
1670 	}
1671 
1672 	/* SQ - read access only (0), dma sync not required (0) */
1673 	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1674 				  ureq.sq_len, 0, 0, alloc_and_init);
1675 	if (rc)
1676 		return rc;
1677 
1678 	if (!qp->srq) {
1679 		/* RQ - read access only (0), dma sync not required (0) */
1680 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1681 					  ureq.rq_len, 0, 0, alloc_and_init);
1682 		if (rc)
1683 			return rc;
1684 	}
1685 
1686 	memset(&in_params, 0, sizeof(in_params));
1687 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1688 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1689 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1690 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1691 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1692 	if (!qp->srq) {
1693 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1694 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1695 	}
1696 
1697 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1698 					      &in_params, &out_params);
1699 
1700 	if (!qp->qed_qp) {
1701 		rc = -ENOMEM;
1702 		goto err1;
1703 	}
1704 
1705 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1706 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1707 
1708 	qp->qp_id = out_params.qp_id;
1709 	qp->icid = out_params.icid;
1710 
1711 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1712 	if (rc)
1713 		goto err;
1714 
1715 	qedr_qp_user_print(dev, qp);
1716 
1717 	return 0;
1718 err:
1719 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1720 	if (rc)
1721 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1722 
1723 err1:
1724 	qedr_cleanup_user(dev, qp);
1725 	return rc;
1726 }
1727 
1728 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1729 {
1730 	qp->sq.db = dev->db_addr +
1731 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1732 	qp->sq.db_data.data.icid = qp->icid;
1733 
1734 	qp->rq.db = dev->db_addr +
1735 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1736 	qp->rq.db_data.data.icid = qp->icid;
1737 	qp->rq.iwarp_db2 = dev->db_addr +
1738 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1739 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1740 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1741 }
1742 
1743 static int
1744 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1745 			   struct qedr_qp *qp,
1746 			   struct qed_rdma_create_qp_in_params *in_params,
1747 			   u32 n_sq_elems, u32 n_rq_elems)
1748 {
1749 	struct qed_rdma_create_qp_out_params out_params;
1750 	int rc;
1751 
1752 	rc = dev->ops->common->chain_alloc(dev->cdev,
1753 					   QED_CHAIN_USE_TO_PRODUCE,
1754 					   QED_CHAIN_MODE_PBL,
1755 					   QED_CHAIN_CNT_TYPE_U32,
1756 					   n_sq_elems,
1757 					   QEDR_SQE_ELEMENT_SIZE,
1758 					   &qp->sq.pbl, NULL);
1759 
1760 	if (rc)
1761 		return rc;
1762 
1763 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1764 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1765 
1766 	rc = dev->ops->common->chain_alloc(dev->cdev,
1767 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1768 					   QED_CHAIN_MODE_PBL,
1769 					   QED_CHAIN_CNT_TYPE_U32,
1770 					   n_rq_elems,
1771 					   QEDR_RQE_ELEMENT_SIZE,
1772 					   &qp->rq.pbl, NULL);
1773 	if (rc)
1774 		return rc;
1775 
1776 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1777 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1778 
1779 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1780 					      in_params, &out_params);
1781 
1782 	if (!qp->qed_qp)
1783 		return -EINVAL;
1784 
1785 	qp->qp_id = out_params.qp_id;
1786 	qp->icid = out_params.icid;
1787 
1788 	qedr_set_roce_db_info(dev, qp);
1789 	return rc;
1790 }
1791 
1792 static int
1793 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1794 			    struct qedr_qp *qp,
1795 			    struct qed_rdma_create_qp_in_params *in_params,
1796 			    u32 n_sq_elems, u32 n_rq_elems)
1797 {
1798 	struct qed_rdma_create_qp_out_params out_params;
1799 	struct qed_chain_ext_pbl ext_pbl;
1800 	int rc;
1801 
1802 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1803 						     QEDR_SQE_ELEMENT_SIZE,
1804 						     QED_CHAIN_MODE_PBL);
1805 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1806 						     QEDR_RQE_ELEMENT_SIZE,
1807 						     QED_CHAIN_MODE_PBL);
1808 
1809 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1810 					      in_params, &out_params);
1811 
1812 	if (!qp->qed_qp)
1813 		return -EINVAL;
1814 
1815 	/* Now we allocate the chain */
1816 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1817 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1818 
1819 	rc = dev->ops->common->chain_alloc(dev->cdev,
1820 					   QED_CHAIN_USE_TO_PRODUCE,
1821 					   QED_CHAIN_MODE_PBL,
1822 					   QED_CHAIN_CNT_TYPE_U32,
1823 					   n_sq_elems,
1824 					   QEDR_SQE_ELEMENT_SIZE,
1825 					   &qp->sq.pbl, &ext_pbl);
1826 
1827 	if (rc)
1828 		goto err;
1829 
1830 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1831 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1832 
1833 	rc = dev->ops->common->chain_alloc(dev->cdev,
1834 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1835 					   QED_CHAIN_MODE_PBL,
1836 					   QED_CHAIN_CNT_TYPE_U32,
1837 					   n_rq_elems,
1838 					   QEDR_RQE_ELEMENT_SIZE,
1839 					   &qp->rq.pbl, &ext_pbl);
1840 
1841 	if (rc)
1842 		goto err;
1843 
1844 	qp->qp_id = out_params.qp_id;
1845 	qp->icid = out_params.icid;
1846 
1847 	qedr_set_iwarp_db_info(dev, qp);
1848 	return rc;
1849 
1850 err:
1851 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1852 
1853 	return rc;
1854 }
1855 
1856 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1857 {
1858 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1859 	kfree(qp->wqe_wr_id);
1860 
1861 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1862 	kfree(qp->rqe_wr_id);
1863 }
1864 
1865 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1866 				 struct qedr_qp *qp,
1867 				 struct ib_pd *ibpd,
1868 				 struct ib_qp_init_attr *attrs)
1869 {
1870 	struct qed_rdma_create_qp_in_params in_params;
1871 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1872 	int rc = -EINVAL;
1873 	u32 n_rq_elems;
1874 	u32 n_sq_elems;
1875 	u32 n_sq_entries;
1876 
1877 	memset(&in_params, 0, sizeof(in_params));
1878 
1879 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1880 	 * the ring. The ring should allow at least a single WR, even if the
1881 	 * user requested none, due to allocation issues.
1882 	 * We should add an extra WR since the prod and cons indices of
1883 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1884 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1885 	 * double the number of entries due an iSER issue that pushes far more
1886 	 * WRs than indicated. If we decline its ib_post_send() then we get
1887 	 * error prints in the dmesg we'd like to avoid.
1888 	 */
1889 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1890 			      dev->attr.max_sqe);
1891 
1892 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
1893 				GFP_KERNEL);
1894 	if (!qp->wqe_wr_id) {
1895 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1896 		return -ENOMEM;
1897 	}
1898 
1899 	/* QP handle to be written in CQE */
1900 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1901 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1902 
1903 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1904 	 * the ring. There ring should allow at least a single WR, even if the
1905 	 * user requested none, due to allocation issues.
1906 	 */
1907 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1908 
1909 	/* Allocate driver internal RQ array */
1910 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
1911 				GFP_KERNEL);
1912 	if (!qp->rqe_wr_id) {
1913 		DP_ERR(dev,
1914 		       "create qp: failed RQ shadow memory allocation\n");
1915 		kfree(qp->wqe_wr_id);
1916 		return -ENOMEM;
1917 	}
1918 
1919 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1920 
1921 	n_sq_entries = attrs->cap.max_send_wr;
1922 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1923 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1924 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1925 
1926 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1927 
1928 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1929 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1930 						 n_sq_elems, n_rq_elems);
1931 	else
1932 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1933 						n_sq_elems, n_rq_elems);
1934 	if (rc)
1935 		qedr_cleanup_kernel(dev, qp);
1936 
1937 	return rc;
1938 }
1939 
1940 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1941 			     struct ib_qp_init_attr *attrs,
1942 			     struct ib_udata *udata)
1943 {
1944 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1945 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1946 	struct qedr_qp *qp;
1947 	struct ib_qp *ibqp;
1948 	int rc = 0;
1949 
1950 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1951 		 udata ? "user library" : "kernel", pd);
1952 
1953 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
1954 	if (rc)
1955 		return ERR_PTR(rc);
1956 
1957 	DP_DEBUG(dev, QEDR_MSG_QP,
1958 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1959 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1960 		 get_qedr_cq(attrs->send_cq),
1961 		 get_qedr_cq(attrs->send_cq)->icid,
1962 		 get_qedr_cq(attrs->recv_cq),
1963 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
1964 
1965 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1966 	if (!qp) {
1967 		DP_ERR(dev, "create qp: failed allocating memory\n");
1968 		return ERR_PTR(-ENOMEM);
1969 	}
1970 
1971 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1972 
1973 	if (attrs->qp_type == IB_QPT_GSI) {
1974 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1975 		if (IS_ERR(ibqp))
1976 			kfree(qp);
1977 		return ibqp;
1978 	}
1979 
1980 	if (udata)
1981 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1982 	else
1983 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1984 
1985 	if (rc)
1986 		goto err;
1987 
1988 	qp->ibqp.qp_num = qp->qp_id;
1989 
1990 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1991 		rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id);
1992 		if (rc)
1993 			goto err;
1994 	}
1995 
1996 	return &qp->ibqp;
1997 
1998 err:
1999 	kfree(qp);
2000 
2001 	return ERR_PTR(-EFAULT);
2002 }
2003 
2004 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2005 {
2006 	switch (qp_state) {
2007 	case QED_ROCE_QP_STATE_RESET:
2008 		return IB_QPS_RESET;
2009 	case QED_ROCE_QP_STATE_INIT:
2010 		return IB_QPS_INIT;
2011 	case QED_ROCE_QP_STATE_RTR:
2012 		return IB_QPS_RTR;
2013 	case QED_ROCE_QP_STATE_RTS:
2014 		return IB_QPS_RTS;
2015 	case QED_ROCE_QP_STATE_SQD:
2016 		return IB_QPS_SQD;
2017 	case QED_ROCE_QP_STATE_ERR:
2018 		return IB_QPS_ERR;
2019 	case QED_ROCE_QP_STATE_SQE:
2020 		return IB_QPS_SQE;
2021 	}
2022 	return IB_QPS_ERR;
2023 }
2024 
2025 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2026 					enum ib_qp_state qp_state)
2027 {
2028 	switch (qp_state) {
2029 	case IB_QPS_RESET:
2030 		return QED_ROCE_QP_STATE_RESET;
2031 	case IB_QPS_INIT:
2032 		return QED_ROCE_QP_STATE_INIT;
2033 	case IB_QPS_RTR:
2034 		return QED_ROCE_QP_STATE_RTR;
2035 	case IB_QPS_RTS:
2036 		return QED_ROCE_QP_STATE_RTS;
2037 	case IB_QPS_SQD:
2038 		return QED_ROCE_QP_STATE_SQD;
2039 	case IB_QPS_ERR:
2040 		return QED_ROCE_QP_STATE_ERR;
2041 	default:
2042 		return QED_ROCE_QP_STATE_ERR;
2043 	}
2044 }
2045 
2046 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2047 {
2048 	qed_chain_reset(&qph->pbl);
2049 	qph->prod = 0;
2050 	qph->cons = 0;
2051 	qph->wqe_cons = 0;
2052 	qph->db_data.data.value = cpu_to_le16(0);
2053 }
2054 
2055 static int qedr_update_qp_state(struct qedr_dev *dev,
2056 				struct qedr_qp *qp,
2057 				enum qed_roce_qp_state cur_state,
2058 				enum qed_roce_qp_state new_state)
2059 {
2060 	int status = 0;
2061 
2062 	if (new_state == cur_state)
2063 		return 0;
2064 
2065 	switch (cur_state) {
2066 	case QED_ROCE_QP_STATE_RESET:
2067 		switch (new_state) {
2068 		case QED_ROCE_QP_STATE_INIT:
2069 			qp->prev_wqe_size = 0;
2070 			qedr_reset_qp_hwq_info(&qp->sq);
2071 			qedr_reset_qp_hwq_info(&qp->rq);
2072 			break;
2073 		default:
2074 			status = -EINVAL;
2075 			break;
2076 		}
2077 		break;
2078 	case QED_ROCE_QP_STATE_INIT:
2079 		switch (new_state) {
2080 		case QED_ROCE_QP_STATE_RTR:
2081 			/* Update doorbell (in case post_recv was
2082 			 * done before move to RTR)
2083 			 */
2084 
2085 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2086 				writel(qp->rq.db_data.raw, qp->rq.db);
2087 				/* Make sure write takes effect */
2088 				mmiowb();
2089 			}
2090 			break;
2091 		case QED_ROCE_QP_STATE_ERR:
2092 			break;
2093 		default:
2094 			/* Invalid state change. */
2095 			status = -EINVAL;
2096 			break;
2097 		}
2098 		break;
2099 	case QED_ROCE_QP_STATE_RTR:
2100 		/* RTR->XXX */
2101 		switch (new_state) {
2102 		case QED_ROCE_QP_STATE_RTS:
2103 			break;
2104 		case QED_ROCE_QP_STATE_ERR:
2105 			break;
2106 		default:
2107 			/* Invalid state change. */
2108 			status = -EINVAL;
2109 			break;
2110 		}
2111 		break;
2112 	case QED_ROCE_QP_STATE_RTS:
2113 		/* RTS->XXX */
2114 		switch (new_state) {
2115 		case QED_ROCE_QP_STATE_SQD:
2116 			break;
2117 		case QED_ROCE_QP_STATE_ERR:
2118 			break;
2119 		default:
2120 			/* Invalid state change. */
2121 			status = -EINVAL;
2122 			break;
2123 		}
2124 		break;
2125 	case QED_ROCE_QP_STATE_SQD:
2126 		/* SQD->XXX */
2127 		switch (new_state) {
2128 		case QED_ROCE_QP_STATE_RTS:
2129 		case QED_ROCE_QP_STATE_ERR:
2130 			break;
2131 		default:
2132 			/* Invalid state change. */
2133 			status = -EINVAL;
2134 			break;
2135 		}
2136 		break;
2137 	case QED_ROCE_QP_STATE_ERR:
2138 		/* ERR->XXX */
2139 		switch (new_state) {
2140 		case QED_ROCE_QP_STATE_RESET:
2141 			if ((qp->rq.prod != qp->rq.cons) ||
2142 			    (qp->sq.prod != qp->sq.cons)) {
2143 				DP_NOTICE(dev,
2144 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2145 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2146 					  qp->sq.cons);
2147 				status = -EINVAL;
2148 			}
2149 			break;
2150 		default:
2151 			status = -EINVAL;
2152 			break;
2153 		}
2154 		break;
2155 	default:
2156 		status = -EINVAL;
2157 		break;
2158 	}
2159 
2160 	return status;
2161 }
2162 
2163 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2164 		   int attr_mask, struct ib_udata *udata)
2165 {
2166 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2167 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2168 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2169 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2170 	enum ib_qp_state old_qp_state, new_qp_state;
2171 	enum qed_roce_qp_state cur_state;
2172 	int rc = 0;
2173 
2174 	DP_DEBUG(dev, QEDR_MSG_QP,
2175 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2176 		 attr->qp_state);
2177 
2178 	old_qp_state = qedr_get_ibqp_state(qp->state);
2179 	if (attr_mask & IB_QP_STATE)
2180 		new_qp_state = attr->qp_state;
2181 	else
2182 		new_qp_state = old_qp_state;
2183 
2184 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2185 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2186 					ibqp->qp_type, attr_mask)) {
2187 			DP_ERR(dev,
2188 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2189 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2190 			       attr_mask, qp->qp_id, ibqp->qp_type,
2191 			       old_qp_state, new_qp_state);
2192 			rc = -EINVAL;
2193 			goto err;
2194 		}
2195 	}
2196 
2197 	/* Translate the masks... */
2198 	if (attr_mask & IB_QP_STATE) {
2199 		SET_FIELD(qp_params.modify_flags,
2200 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2201 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2202 	}
2203 
2204 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2205 		qp_params.sqd_async = true;
2206 
2207 	if (attr_mask & IB_QP_PKEY_INDEX) {
2208 		SET_FIELD(qp_params.modify_flags,
2209 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2210 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2211 			rc = -EINVAL;
2212 			goto err;
2213 		}
2214 
2215 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2216 	}
2217 
2218 	if (attr_mask & IB_QP_QKEY)
2219 		qp->qkey = attr->qkey;
2220 
2221 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2222 		SET_FIELD(qp_params.modify_flags,
2223 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2224 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2225 						  IB_ACCESS_REMOTE_READ;
2226 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2227 						   IB_ACCESS_REMOTE_WRITE;
2228 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2229 					       IB_ACCESS_REMOTE_ATOMIC;
2230 	}
2231 
2232 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2233 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2234 			return -EINVAL;
2235 
2236 		if (attr_mask & IB_QP_PATH_MTU) {
2237 			if (attr->path_mtu < IB_MTU_256 ||
2238 			    attr->path_mtu > IB_MTU_4096) {
2239 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2240 				rc = -EINVAL;
2241 				goto err;
2242 			}
2243 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2244 				      ib_mtu_enum_to_int(iboe_get_mtu
2245 							 (dev->ndev->mtu)));
2246 		}
2247 
2248 		if (!qp->mtu) {
2249 			qp->mtu =
2250 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2251 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2252 		}
2253 
2254 		SET_FIELD(qp_params.modify_flags,
2255 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2256 
2257 		qp_params.traffic_class_tos = grh->traffic_class;
2258 		qp_params.flow_label = grh->flow_label;
2259 		qp_params.hop_limit_ttl = grh->hop_limit;
2260 
2261 		qp->sgid_idx = grh->sgid_index;
2262 
2263 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2264 		if (rc) {
2265 			DP_ERR(dev,
2266 			       "modify qp: problems with GID index %d (rc=%d)\n",
2267 			       grh->sgid_index, rc);
2268 			return rc;
2269 		}
2270 
2271 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2272 				   qp_params.remote_mac_addr);
2273 		if (rc)
2274 			return rc;
2275 
2276 		qp_params.use_local_mac = true;
2277 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2278 
2279 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2280 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2281 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2282 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2283 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2284 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2285 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2286 			 qp_params.remote_mac_addr);
2287 
2288 		qp_params.mtu = qp->mtu;
2289 		qp_params.lb_indication = false;
2290 	}
2291 
2292 	if (!qp_params.mtu) {
2293 		/* Stay with current MTU */
2294 		if (qp->mtu)
2295 			qp_params.mtu = qp->mtu;
2296 		else
2297 			qp_params.mtu =
2298 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2299 	}
2300 
2301 	if (attr_mask & IB_QP_TIMEOUT) {
2302 		SET_FIELD(qp_params.modify_flags,
2303 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2304 
2305 		/* The received timeout value is an exponent used like this:
2306 		 *    "12.7.34 LOCAL ACK TIMEOUT
2307 		 *    Value representing the transport (ACK) timeout for use by
2308 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2309 		 * The FW expects timeout in msec so we need to divide the usec
2310 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2311 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2312 		 * The value of zero means infinite so we use a 'max_t' to make
2313 		 * sure that sub 1 msec values will be configured as 1 msec.
2314 		 */
2315 		if (attr->timeout)
2316 			qp_params.ack_timeout =
2317 					1 << max_t(int, attr->timeout - 8, 0);
2318 		else
2319 			qp_params.ack_timeout = 0;
2320 	}
2321 
2322 	if (attr_mask & IB_QP_RETRY_CNT) {
2323 		SET_FIELD(qp_params.modify_flags,
2324 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2325 		qp_params.retry_cnt = attr->retry_cnt;
2326 	}
2327 
2328 	if (attr_mask & IB_QP_RNR_RETRY) {
2329 		SET_FIELD(qp_params.modify_flags,
2330 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2331 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2332 	}
2333 
2334 	if (attr_mask & IB_QP_RQ_PSN) {
2335 		SET_FIELD(qp_params.modify_flags,
2336 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2337 		qp_params.rq_psn = attr->rq_psn;
2338 		qp->rq_psn = attr->rq_psn;
2339 	}
2340 
2341 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2342 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2343 			rc = -EINVAL;
2344 			DP_ERR(dev,
2345 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2346 			       attr->max_rd_atomic,
2347 			       dev->attr.max_qp_req_rd_atomic_resc);
2348 			goto err;
2349 		}
2350 
2351 		SET_FIELD(qp_params.modify_flags,
2352 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2353 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2354 	}
2355 
2356 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2357 		SET_FIELD(qp_params.modify_flags,
2358 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2359 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2360 	}
2361 
2362 	if (attr_mask & IB_QP_SQ_PSN) {
2363 		SET_FIELD(qp_params.modify_flags,
2364 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2365 		qp_params.sq_psn = attr->sq_psn;
2366 		qp->sq_psn = attr->sq_psn;
2367 	}
2368 
2369 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2370 		if (attr->max_dest_rd_atomic >
2371 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2372 			DP_ERR(dev,
2373 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2374 			       attr->max_dest_rd_atomic,
2375 			       dev->attr.max_qp_resp_rd_atomic_resc);
2376 
2377 			rc = -EINVAL;
2378 			goto err;
2379 		}
2380 
2381 		SET_FIELD(qp_params.modify_flags,
2382 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2383 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2384 	}
2385 
2386 	if (attr_mask & IB_QP_DEST_QPN) {
2387 		SET_FIELD(qp_params.modify_flags,
2388 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2389 
2390 		qp_params.dest_qp = attr->dest_qp_num;
2391 		qp->dest_qp_num = attr->dest_qp_num;
2392 	}
2393 
2394 	cur_state = qp->state;
2395 
2396 	/* Update the QP state before the actual ramrod to prevent a race with
2397 	 * fast path. Modifying the QP state to error will cause the device to
2398 	 * flush the CQEs and while polling the flushed CQEs will considered as
2399 	 * a potential issue if the QP isn't in error state.
2400 	 */
2401 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2402 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2403 		qp->state = QED_ROCE_QP_STATE_ERR;
2404 
2405 	if (qp->qp_type != IB_QPT_GSI)
2406 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2407 					      qp->qed_qp, &qp_params);
2408 
2409 	if (attr_mask & IB_QP_STATE) {
2410 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2411 			rc = qedr_update_qp_state(dev, qp, cur_state,
2412 						  qp_params.new_state);
2413 		qp->state = qp_params.new_state;
2414 	}
2415 
2416 err:
2417 	return rc;
2418 }
2419 
2420 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2421 {
2422 	int ib_qp_acc_flags = 0;
2423 
2424 	if (params->incoming_rdma_write_en)
2425 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2426 	if (params->incoming_rdma_read_en)
2427 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2428 	if (params->incoming_atomic_en)
2429 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2430 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2431 	return ib_qp_acc_flags;
2432 }
2433 
2434 int qedr_query_qp(struct ib_qp *ibqp,
2435 		  struct ib_qp_attr *qp_attr,
2436 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2437 {
2438 	struct qed_rdma_query_qp_out_params params;
2439 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2440 	struct qedr_dev *dev = qp->dev;
2441 	int rc = 0;
2442 
2443 	memset(&params, 0, sizeof(params));
2444 
2445 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2446 	if (rc)
2447 		goto err;
2448 
2449 	memset(qp_attr, 0, sizeof(*qp_attr));
2450 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2451 
2452 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2453 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2454 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2455 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2456 	qp_attr->rq_psn = params.rq_psn;
2457 	qp_attr->sq_psn = params.sq_psn;
2458 	qp_attr->dest_qp_num = params.dest_qp;
2459 
2460 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2461 
2462 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2463 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2464 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2465 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2466 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2467 	qp_init_attr->cap = qp_attr->cap;
2468 
2469 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2470 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2471 			params.flow_label, qp->sgid_idx,
2472 			params.hop_limit_ttl, params.traffic_class_tos);
2473 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2474 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2475 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2476 	qp_attr->timeout = params.timeout;
2477 	qp_attr->rnr_retry = params.rnr_retry;
2478 	qp_attr->retry_cnt = params.retry_cnt;
2479 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2480 	qp_attr->pkey_index = params.pkey_index;
2481 	qp_attr->port_num = 1;
2482 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2483 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2484 	qp_attr->alt_pkey_index = 0;
2485 	qp_attr->alt_port_num = 0;
2486 	qp_attr->alt_timeout = 0;
2487 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2488 
2489 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2490 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2491 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2492 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2493 
2494 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2495 		 qp_attr->cap.max_inline_data);
2496 
2497 err:
2498 	return rc;
2499 }
2500 
2501 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2502 {
2503 	int rc = 0;
2504 
2505 	if (qp->qp_type != IB_QPT_GSI) {
2506 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2507 		if (rc)
2508 			return rc;
2509 	}
2510 
2511 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2512 		qedr_cleanup_user(dev, qp);
2513 	else
2514 		qedr_cleanup_kernel(dev, qp);
2515 
2516 	return 0;
2517 }
2518 
2519 int qedr_destroy_qp(struct ib_qp *ibqp)
2520 {
2521 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2522 	struct qedr_dev *dev = qp->dev;
2523 	struct ib_qp_attr attr;
2524 	int attr_mask = 0;
2525 	int rc = 0;
2526 
2527 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2528 		 qp, qp->qp_type);
2529 
2530 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2531 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2532 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2533 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2534 
2535 			attr.qp_state = IB_QPS_ERR;
2536 			attr_mask |= IB_QP_STATE;
2537 
2538 			/* Change the QP state to ERROR */
2539 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2540 		}
2541 	} else {
2542 		/* Wait for the connect/accept to complete */
2543 		if (qp->ep) {
2544 			int wait_count = 1;
2545 
2546 			while (qp->ep->during_connect) {
2547 				DP_DEBUG(dev, QEDR_MSG_QP,
2548 					 "Still in during connect/accept\n");
2549 
2550 				msleep(100);
2551 				if (wait_count++ > 200) {
2552 					DP_NOTICE(dev,
2553 						  "during connect timeout\n");
2554 					break;
2555 				}
2556 			}
2557 		}
2558 	}
2559 
2560 	if (qp->qp_type == IB_QPT_GSI)
2561 		qedr_destroy_gsi_qp(dev);
2562 
2563 	qedr_free_qp_resources(dev, qp);
2564 
2565 	if (atomic_dec_and_test(&qp->refcnt) &&
2566 	    rdma_protocol_iwarp(&dev->ibdev, 1)) {
2567 		qedr_idr_remove(dev, &dev->qpidr, qp->qp_id);
2568 		kfree(qp);
2569 	}
2570 	return rc;
2571 }
2572 
2573 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2574 			     u32 flags, struct ib_udata *udata)
2575 {
2576 	struct qedr_ah *ah;
2577 
2578 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2579 	if (!ah)
2580 		return ERR_PTR(-ENOMEM);
2581 
2582 	rdma_copy_ah_attr(&ah->attr, attr);
2583 
2584 	return &ah->ibah;
2585 }
2586 
2587 int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2588 {
2589 	struct qedr_ah *ah = get_qedr_ah(ibah);
2590 
2591 	rdma_destroy_ah_attr(&ah->attr);
2592 	kfree(ah);
2593 	return 0;
2594 }
2595 
2596 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2597 {
2598 	struct qedr_pbl *pbl, *tmp;
2599 
2600 	if (info->pbl_table)
2601 		list_add_tail(&info->pbl_table->list_entry,
2602 			      &info->free_pbl_list);
2603 
2604 	if (!list_empty(&info->inuse_pbl_list))
2605 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2606 
2607 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2608 		list_del(&pbl->list_entry);
2609 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2610 	}
2611 }
2612 
2613 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2614 			size_t page_list_len, bool two_layered)
2615 {
2616 	struct qedr_pbl *tmp;
2617 	int rc;
2618 
2619 	INIT_LIST_HEAD(&info->free_pbl_list);
2620 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2621 
2622 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2623 				  page_list_len, two_layered);
2624 	if (rc)
2625 		goto done;
2626 
2627 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2628 	if (IS_ERR(info->pbl_table)) {
2629 		rc = PTR_ERR(info->pbl_table);
2630 		goto done;
2631 	}
2632 
2633 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2634 		 &info->pbl_table->pa);
2635 
2636 	/* in usual case we use 2 PBLs, so we add one to free
2637 	 * list and allocating another one
2638 	 */
2639 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2640 	if (IS_ERR(tmp)) {
2641 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2642 		goto done;
2643 	}
2644 
2645 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2646 
2647 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2648 
2649 done:
2650 	if (rc)
2651 		free_mr_info(dev, info);
2652 
2653 	return rc;
2654 }
2655 
2656 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2657 			       u64 usr_addr, int acc, struct ib_udata *udata)
2658 {
2659 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2660 	struct qedr_mr *mr;
2661 	struct qedr_pd *pd;
2662 	int rc = -ENOMEM;
2663 
2664 	pd = get_qedr_pd(ibpd);
2665 	DP_DEBUG(dev, QEDR_MSG_MR,
2666 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2667 		 pd->pd_id, start, len, usr_addr, acc);
2668 
2669 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2670 		return ERR_PTR(-EINVAL);
2671 
2672 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2673 	if (!mr)
2674 		return ERR_PTR(rc);
2675 
2676 	mr->type = QEDR_MR_USER;
2677 
2678 	mr->umem = ib_umem_get(udata, start, len, acc, 0);
2679 	if (IS_ERR(mr->umem)) {
2680 		rc = -EFAULT;
2681 		goto err0;
2682 	}
2683 
2684 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2685 	if (rc)
2686 		goto err1;
2687 
2688 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2689 			   &mr->info.pbl_info, PAGE_SHIFT);
2690 
2691 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2692 	if (rc) {
2693 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2694 		goto err1;
2695 	}
2696 
2697 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2698 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2699 	mr->hw_mr.key = 0;
2700 	mr->hw_mr.pd = pd->pd_id;
2701 	mr->hw_mr.local_read = 1;
2702 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2703 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2704 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2705 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2706 	mr->hw_mr.mw_bind = false;
2707 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2708 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2709 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2710 	mr->hw_mr.page_size_log = PAGE_SHIFT;
2711 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2712 	mr->hw_mr.length = len;
2713 	mr->hw_mr.vaddr = usr_addr;
2714 	mr->hw_mr.zbva = false;
2715 	mr->hw_mr.phy_mr = false;
2716 	mr->hw_mr.dma_mr = false;
2717 
2718 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2719 	if (rc) {
2720 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2721 		goto err2;
2722 	}
2723 
2724 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2725 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2726 	    mr->hw_mr.remote_atomic)
2727 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2728 
2729 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2730 		 mr->ibmr.lkey);
2731 	return &mr->ibmr;
2732 
2733 err2:
2734 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2735 err1:
2736 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2737 err0:
2738 	kfree(mr);
2739 	return ERR_PTR(rc);
2740 }
2741 
2742 int qedr_dereg_mr(struct ib_mr *ib_mr)
2743 {
2744 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2745 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2746 	int rc = 0;
2747 
2748 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2749 	if (rc)
2750 		return rc;
2751 
2752 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2753 
2754 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2755 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2756 
2757 	/* it could be user registered memory. */
2758 	if (mr->umem)
2759 		ib_umem_release(mr->umem);
2760 
2761 	kfree(mr);
2762 
2763 	return rc;
2764 }
2765 
2766 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2767 				       int max_page_list_len)
2768 {
2769 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2770 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2771 	struct qedr_mr *mr;
2772 	int rc = -ENOMEM;
2773 
2774 	DP_DEBUG(dev, QEDR_MSG_MR,
2775 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2776 		 max_page_list_len);
2777 
2778 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2779 	if (!mr)
2780 		return ERR_PTR(rc);
2781 
2782 	mr->dev = dev;
2783 	mr->type = QEDR_MR_FRMR;
2784 
2785 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2786 	if (rc)
2787 		goto err0;
2788 
2789 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2790 	if (rc) {
2791 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2792 		goto err0;
2793 	}
2794 
2795 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2796 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2797 	mr->hw_mr.key = 0;
2798 	mr->hw_mr.pd = pd->pd_id;
2799 	mr->hw_mr.local_read = 1;
2800 	mr->hw_mr.local_write = 0;
2801 	mr->hw_mr.remote_read = 0;
2802 	mr->hw_mr.remote_write = 0;
2803 	mr->hw_mr.remote_atomic = 0;
2804 	mr->hw_mr.mw_bind = false;
2805 	mr->hw_mr.pbl_ptr = 0;
2806 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2807 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2808 	mr->hw_mr.fbo = 0;
2809 	mr->hw_mr.length = 0;
2810 	mr->hw_mr.vaddr = 0;
2811 	mr->hw_mr.zbva = false;
2812 	mr->hw_mr.phy_mr = true;
2813 	mr->hw_mr.dma_mr = false;
2814 
2815 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2816 	if (rc) {
2817 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2818 		goto err1;
2819 	}
2820 
2821 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2822 	mr->ibmr.rkey = mr->ibmr.lkey;
2823 
2824 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2825 	return mr;
2826 
2827 err1:
2828 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2829 err0:
2830 	kfree(mr);
2831 	return ERR_PTR(rc);
2832 }
2833 
2834 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2835 			    enum ib_mr_type mr_type, u32 max_num_sg)
2836 {
2837 	struct qedr_mr *mr;
2838 
2839 	if (mr_type != IB_MR_TYPE_MEM_REG)
2840 		return ERR_PTR(-EINVAL);
2841 
2842 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2843 
2844 	if (IS_ERR(mr))
2845 		return ERR_PTR(-EINVAL);
2846 
2847 	return &mr->ibmr;
2848 }
2849 
2850 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2851 {
2852 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2853 	struct qedr_pbl *pbl_table;
2854 	struct regpair *pbe;
2855 	u32 pbes_in_page;
2856 
2857 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2858 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2859 		return -ENOMEM;
2860 	}
2861 
2862 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2863 		 mr->npages, addr);
2864 
2865 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2866 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2867 	pbe = (struct regpair *)pbl_table->va;
2868 	pbe +=  mr->npages % pbes_in_page;
2869 	pbe->lo = cpu_to_le32((u32)addr);
2870 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2871 
2872 	mr->npages++;
2873 
2874 	return 0;
2875 }
2876 
2877 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2878 {
2879 	int work = info->completed - info->completed_handled - 1;
2880 
2881 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2882 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2883 		struct qedr_pbl *pbl;
2884 
2885 		/* Free all the page list that are possible to be freed
2886 		 * (all the ones that were invalidated), under the assumption
2887 		 * that if an FMR was completed successfully that means that
2888 		 * if there was an invalidate operation before it also ended
2889 		 */
2890 		pbl = list_first_entry(&info->inuse_pbl_list,
2891 				       struct qedr_pbl, list_entry);
2892 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2893 		info->completed_handled++;
2894 	}
2895 }
2896 
2897 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2898 		   int sg_nents, unsigned int *sg_offset)
2899 {
2900 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2901 
2902 	mr->npages = 0;
2903 
2904 	handle_completed_mrs(mr->dev, &mr->info);
2905 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2906 }
2907 
2908 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2909 {
2910 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2911 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2912 	struct qedr_mr *mr;
2913 	int rc;
2914 
2915 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2916 	if (!mr)
2917 		return ERR_PTR(-ENOMEM);
2918 
2919 	mr->type = QEDR_MR_DMA;
2920 
2921 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2922 	if (rc) {
2923 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2924 		goto err1;
2925 	}
2926 
2927 	/* index only, 18 bit long, lkey = itid << 8 | key */
2928 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2929 	mr->hw_mr.pd = pd->pd_id;
2930 	mr->hw_mr.local_read = 1;
2931 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2932 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2933 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2934 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2935 	mr->hw_mr.dma_mr = true;
2936 
2937 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2938 	if (rc) {
2939 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2940 		goto err2;
2941 	}
2942 
2943 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2944 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2945 	    mr->hw_mr.remote_atomic)
2946 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2947 
2948 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2949 	return &mr->ibmr;
2950 
2951 err2:
2952 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2953 err1:
2954 	kfree(mr);
2955 	return ERR_PTR(rc);
2956 }
2957 
2958 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2959 {
2960 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2961 }
2962 
2963 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2964 {
2965 	int i, len = 0;
2966 
2967 	for (i = 0; i < num_sge; i++)
2968 		len += sg_list[i].length;
2969 
2970 	return len;
2971 }
2972 
2973 static void swap_wqe_data64(u64 *p)
2974 {
2975 	int i;
2976 
2977 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2978 		*p = cpu_to_be64(cpu_to_le64(*p));
2979 }
2980 
2981 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2982 				       struct qedr_qp *qp, u8 *wqe_size,
2983 				       const struct ib_send_wr *wr,
2984 				       const struct ib_send_wr **bad_wr,
2985 				       u8 *bits, u8 bit)
2986 {
2987 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2988 	char *seg_prt, *wqe;
2989 	int i, seg_siz;
2990 
2991 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2992 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2993 		*bad_wr = wr;
2994 		return 0;
2995 	}
2996 
2997 	if (!data_size)
2998 		return data_size;
2999 
3000 	*bits |= bit;
3001 
3002 	seg_prt = NULL;
3003 	wqe = NULL;
3004 	seg_siz = 0;
3005 
3006 	/* Copy data inline */
3007 	for (i = 0; i < wr->num_sge; i++) {
3008 		u32 len = wr->sg_list[i].length;
3009 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3010 
3011 		while (len > 0) {
3012 			u32 cur;
3013 
3014 			/* New segment required */
3015 			if (!seg_siz) {
3016 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3017 				seg_prt = wqe;
3018 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3019 				(*wqe_size)++;
3020 			}
3021 
3022 			/* Calculate currently allowed length */
3023 			cur = min_t(u32, len, seg_siz);
3024 			memcpy(seg_prt, src, cur);
3025 
3026 			/* Update segment variables */
3027 			seg_prt += cur;
3028 			seg_siz -= cur;
3029 
3030 			/* Update sge variables */
3031 			src += cur;
3032 			len -= cur;
3033 
3034 			/* Swap fully-completed segments */
3035 			if (!seg_siz)
3036 				swap_wqe_data64((u64 *)wqe);
3037 		}
3038 	}
3039 
3040 	/* swap last not completed segment */
3041 	if (seg_siz)
3042 		swap_wqe_data64((u64 *)wqe);
3043 
3044 	return data_size;
3045 }
3046 
3047 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3048 	do {							\
3049 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3050 		(sge)->length = cpu_to_le32(vlength);		\
3051 		(sge)->flags = cpu_to_le32(vflags);		\
3052 	} while (0)
3053 
3054 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3055 	do {							\
3056 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3057 		(hdr)->num_sges = num_sge;			\
3058 	} while (0)
3059 
3060 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3061 	do {							\
3062 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3063 		(sge)->length = cpu_to_le32(vlength);		\
3064 		(sge)->l_key = cpu_to_le32(vlkey);		\
3065 	} while (0)
3066 
3067 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3068 				const struct ib_send_wr *wr)
3069 {
3070 	u32 data_size = 0;
3071 	int i;
3072 
3073 	for (i = 0; i < wr->num_sge; i++) {
3074 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3075 
3076 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3077 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3078 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3079 		data_size += wr->sg_list[i].length;
3080 	}
3081 
3082 	if (wqe_size)
3083 		*wqe_size += wr->num_sge;
3084 
3085 	return data_size;
3086 }
3087 
3088 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3089 				     struct qedr_qp *qp,
3090 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3091 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3092 				     const struct ib_send_wr *wr,
3093 				     const struct ib_send_wr **bad_wr)
3094 {
3095 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3096 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3097 
3098 	if (wr->send_flags & IB_SEND_INLINE &&
3099 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3100 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3101 		u8 flags = 0;
3102 
3103 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3104 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3105 						   bad_wr, &rwqe->flags, flags);
3106 	}
3107 
3108 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3109 }
3110 
3111 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3112 				     struct qedr_qp *qp,
3113 				     struct rdma_sq_send_wqe_1st *swqe,
3114 				     struct rdma_sq_send_wqe_2st *swqe2,
3115 				     const struct ib_send_wr *wr,
3116 				     const struct ib_send_wr **bad_wr)
3117 {
3118 	memset(swqe2, 0, sizeof(*swqe2));
3119 	if (wr->send_flags & IB_SEND_INLINE) {
3120 		u8 flags = 0;
3121 
3122 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3123 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3124 						   bad_wr, &swqe->flags, flags);
3125 	}
3126 
3127 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3128 }
3129 
3130 static int qedr_prepare_reg(struct qedr_qp *qp,
3131 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3132 			    const struct ib_reg_wr *wr)
3133 {
3134 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3135 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3136 
3137 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3138 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3139 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3140 	fwqe1->l_key = wr->key;
3141 
3142 	fwqe2->access_ctrl = 0;
3143 
3144 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3145 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3146 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3147 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3148 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3149 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3150 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3151 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3152 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3153 	fwqe2->fmr_ctrl = 0;
3154 
3155 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3156 		   ilog2(mr->ibmr.page_size) - 12);
3157 
3158 	fwqe2->length_hi = 0;
3159 	fwqe2->length_lo = mr->ibmr.length;
3160 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3161 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3162 
3163 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3164 
3165 	return 0;
3166 }
3167 
3168 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3169 {
3170 	switch (opcode) {
3171 	case IB_WR_RDMA_WRITE:
3172 	case IB_WR_RDMA_WRITE_WITH_IMM:
3173 		return IB_WC_RDMA_WRITE;
3174 	case IB_WR_SEND_WITH_IMM:
3175 	case IB_WR_SEND:
3176 	case IB_WR_SEND_WITH_INV:
3177 		return IB_WC_SEND;
3178 	case IB_WR_RDMA_READ:
3179 	case IB_WR_RDMA_READ_WITH_INV:
3180 		return IB_WC_RDMA_READ;
3181 	case IB_WR_ATOMIC_CMP_AND_SWP:
3182 		return IB_WC_COMP_SWAP;
3183 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3184 		return IB_WC_FETCH_ADD;
3185 	case IB_WR_REG_MR:
3186 		return IB_WC_REG_MR;
3187 	case IB_WR_LOCAL_INV:
3188 		return IB_WC_LOCAL_INV;
3189 	default:
3190 		return IB_WC_SEND;
3191 	}
3192 }
3193 
3194 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3195 				      const struct ib_send_wr *wr)
3196 {
3197 	int wq_is_full, err_wr, pbl_is_full;
3198 	struct qedr_dev *dev = qp->dev;
3199 
3200 	/* prevent SQ overflow and/or processing of a bad WR */
3201 	err_wr = wr->num_sge > qp->sq.max_sges;
3202 	wq_is_full = qedr_wq_is_full(&qp->sq);
3203 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3204 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3205 	if (wq_is_full || err_wr || pbl_is_full) {
3206 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3207 			DP_ERR(dev,
3208 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3209 			       qp);
3210 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3211 		}
3212 
3213 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3214 			DP_ERR(dev,
3215 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3216 			       qp);
3217 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3218 		}
3219 
3220 		if (pbl_is_full &&
3221 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3222 			DP_ERR(dev,
3223 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3224 			       qp);
3225 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3226 		}
3227 		return false;
3228 	}
3229 	return true;
3230 }
3231 
3232 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3233 			    const struct ib_send_wr **bad_wr)
3234 {
3235 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3236 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3237 	struct rdma_sq_atomic_wqe_1st *awqe1;
3238 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3239 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3240 	struct rdma_sq_send_wqe_2st *swqe2;
3241 	struct rdma_sq_local_inv_wqe *iwqe;
3242 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3243 	struct rdma_sq_send_wqe_1st *swqe;
3244 	struct rdma_sq_rdma_wqe_1st *rwqe;
3245 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3246 	struct rdma_sq_common_wqe *wqe;
3247 	u32 length;
3248 	int rc = 0;
3249 	bool comp;
3250 
3251 	if (!qedr_can_post_send(qp, wr)) {
3252 		*bad_wr = wr;
3253 		return -ENOMEM;
3254 	}
3255 
3256 	wqe = qed_chain_produce(&qp->sq.pbl);
3257 	qp->wqe_wr_id[qp->sq.prod].signaled =
3258 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3259 
3260 	wqe->flags = 0;
3261 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3262 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3263 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3264 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3265 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3266 		   !!(wr->send_flags & IB_SEND_FENCE));
3267 	wqe->prev_wqe_size = qp->prev_wqe_size;
3268 
3269 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3270 
3271 	switch (wr->opcode) {
3272 	case IB_WR_SEND_WITH_IMM:
3273 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3274 			rc = -EINVAL;
3275 			*bad_wr = wr;
3276 			break;
3277 		}
3278 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3279 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3280 		swqe->wqe_size = 2;
3281 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3282 
3283 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3284 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3285 						   wr, bad_wr);
3286 		swqe->length = cpu_to_le32(length);
3287 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3288 		qp->prev_wqe_size = swqe->wqe_size;
3289 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3290 		break;
3291 	case IB_WR_SEND:
3292 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3293 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3294 
3295 		swqe->wqe_size = 2;
3296 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3297 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3298 						   wr, bad_wr);
3299 		swqe->length = cpu_to_le32(length);
3300 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3301 		qp->prev_wqe_size = swqe->wqe_size;
3302 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3303 		break;
3304 	case IB_WR_SEND_WITH_INV:
3305 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3306 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3307 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3308 		swqe->wqe_size = 2;
3309 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3310 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3311 						   wr, bad_wr);
3312 		swqe->length = cpu_to_le32(length);
3313 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3314 		qp->prev_wqe_size = swqe->wqe_size;
3315 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3316 		break;
3317 
3318 	case IB_WR_RDMA_WRITE_WITH_IMM:
3319 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3320 			rc = -EINVAL;
3321 			*bad_wr = wr;
3322 			break;
3323 		}
3324 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3325 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3326 
3327 		rwqe->wqe_size = 2;
3328 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3329 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3330 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3331 						   wr, bad_wr);
3332 		rwqe->length = cpu_to_le32(length);
3333 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3334 		qp->prev_wqe_size = rwqe->wqe_size;
3335 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3336 		break;
3337 	case IB_WR_RDMA_WRITE:
3338 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3339 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3340 
3341 		rwqe->wqe_size = 2;
3342 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3343 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3344 						   wr, bad_wr);
3345 		rwqe->length = cpu_to_le32(length);
3346 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3347 		qp->prev_wqe_size = rwqe->wqe_size;
3348 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3349 		break;
3350 	case IB_WR_RDMA_READ_WITH_INV:
3351 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3352 		/* fallthrough -- same is identical to RDMA READ */
3353 
3354 	case IB_WR_RDMA_READ:
3355 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3356 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3357 
3358 		rwqe->wqe_size = 2;
3359 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3360 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3361 						   wr, bad_wr);
3362 		rwqe->length = cpu_to_le32(length);
3363 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3364 		qp->prev_wqe_size = rwqe->wqe_size;
3365 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3366 		break;
3367 
3368 	case IB_WR_ATOMIC_CMP_AND_SWP:
3369 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3370 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3371 		awqe1->wqe_size = 4;
3372 
3373 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3374 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3375 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3376 
3377 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3378 
3379 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3380 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3381 			DMA_REGPAIR_LE(awqe3->swap_data,
3382 				       atomic_wr(wr)->compare_add);
3383 		} else {
3384 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3385 			DMA_REGPAIR_LE(awqe3->swap_data,
3386 				       atomic_wr(wr)->swap);
3387 			DMA_REGPAIR_LE(awqe3->cmp_data,
3388 				       atomic_wr(wr)->compare_add);
3389 		}
3390 
3391 		qedr_prepare_sq_sges(qp, NULL, wr);
3392 
3393 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3394 		qp->prev_wqe_size = awqe1->wqe_size;
3395 		break;
3396 
3397 	case IB_WR_LOCAL_INV:
3398 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3399 		iwqe->wqe_size = 1;
3400 
3401 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3402 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3403 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3404 		qp->prev_wqe_size = iwqe->wqe_size;
3405 		break;
3406 	case IB_WR_REG_MR:
3407 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3408 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3409 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3410 		fwqe1->wqe_size = 2;
3411 
3412 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3413 		if (rc) {
3414 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3415 			*bad_wr = wr;
3416 			break;
3417 		}
3418 
3419 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3420 		qp->prev_wqe_size = fwqe1->wqe_size;
3421 		break;
3422 	default:
3423 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3424 		rc = -EINVAL;
3425 		*bad_wr = wr;
3426 		break;
3427 	}
3428 
3429 	if (*bad_wr) {
3430 		u16 value;
3431 
3432 		/* Restore prod to its position before
3433 		 * this WR was processed
3434 		 */
3435 		value = le16_to_cpu(qp->sq.db_data.data.value);
3436 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3437 
3438 		/* Restore prev_wqe_size */
3439 		qp->prev_wqe_size = wqe->prev_wqe_size;
3440 		rc = -EINVAL;
3441 		DP_ERR(dev, "POST SEND FAILED\n");
3442 	}
3443 
3444 	return rc;
3445 }
3446 
3447 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3448 		   const struct ib_send_wr **bad_wr)
3449 {
3450 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3451 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3452 	unsigned long flags;
3453 	int rc = 0;
3454 
3455 	*bad_wr = NULL;
3456 
3457 	if (qp->qp_type == IB_QPT_GSI)
3458 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3459 
3460 	spin_lock_irqsave(&qp->q_lock, flags);
3461 
3462 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3463 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3464 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3465 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3466 			spin_unlock_irqrestore(&qp->q_lock, flags);
3467 			*bad_wr = wr;
3468 			DP_DEBUG(dev, QEDR_MSG_CQ,
3469 				 "QP in wrong state! QP icid=0x%x state %d\n",
3470 				 qp->icid, qp->state);
3471 			return -EINVAL;
3472 		}
3473 	}
3474 
3475 	while (wr) {
3476 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3477 		if (rc)
3478 			break;
3479 
3480 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3481 
3482 		qedr_inc_sw_prod(&qp->sq);
3483 
3484 		qp->sq.db_data.data.value++;
3485 
3486 		wr = wr->next;
3487 	}
3488 
3489 	/* Trigger doorbell
3490 	 * If there was a failure in the first WR then it will be triggered in
3491 	 * vane. However this is not harmful (as long as the producer value is
3492 	 * unchanged). For performance reasons we avoid checking for this
3493 	 * redundant doorbell.
3494 	 *
3495 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3496 	 * soon as we give the doorbell, we could get a completion
3497 	 * for this wr, therefore we need to make sure that the
3498 	 * memory is updated before giving the doorbell.
3499 	 * During qedr_poll_cq, rmb is called before accessing the
3500 	 * cqe. This covers for the smp_rmb as well.
3501 	 */
3502 	smp_wmb();
3503 	writel(qp->sq.db_data.raw, qp->sq.db);
3504 
3505 	/* Make sure write sticks */
3506 	mmiowb();
3507 
3508 	spin_unlock_irqrestore(&qp->q_lock, flags);
3509 
3510 	return rc;
3511 }
3512 
3513 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3514 {
3515 	u32 used;
3516 
3517 	/* Calculate number of elements used based on producer
3518 	 * count and consumer count and subtract it from max
3519 	 * work request supported so that we get elements left.
3520 	 */
3521 	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3522 
3523 	return hw_srq->max_wr - used;
3524 }
3525 
3526 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3527 		       const struct ib_recv_wr **bad_wr)
3528 {
3529 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3530 	struct qedr_srq_hwq_info *hw_srq;
3531 	struct qedr_dev *dev = srq->dev;
3532 	struct qed_chain *pbl;
3533 	unsigned long flags;
3534 	int status = 0;
3535 	u32 num_sge;
3536 	u32 offset;
3537 
3538 	spin_lock_irqsave(&srq->lock, flags);
3539 
3540 	hw_srq = &srq->hw_srq;
3541 	pbl = &srq->hw_srq.pbl;
3542 	while (wr) {
3543 		struct rdma_srq_wqe_header *hdr;
3544 		int i;
3545 
3546 		if (!qedr_srq_elem_left(hw_srq) ||
3547 		    wr->num_sge > srq->hw_srq.max_sges) {
3548 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3549 			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3550 			       wr->num_sge, srq->hw_srq.max_sges);
3551 			status = -ENOMEM;
3552 			*bad_wr = wr;
3553 			break;
3554 		}
3555 
3556 		hdr = qed_chain_produce(pbl);
3557 		num_sge = wr->num_sge;
3558 		/* Set number of sge and work request id in header */
3559 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3560 
3561 		srq->hw_srq.wr_prod_cnt++;
3562 		hw_srq->wqe_prod++;
3563 		hw_srq->sge_prod++;
3564 
3565 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3566 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3567 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3568 
3569 		for (i = 0; i < wr->num_sge; i++) {
3570 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3571 
3572 			/* Set SGE length, lkey and address */
3573 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3574 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3575 
3576 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3577 				 "[%d]: len %d key %x addr %x:%x\n",
3578 				 i, srq_sge->length, srq_sge->l_key,
3579 				 srq_sge->addr.hi, srq_sge->addr.lo);
3580 			hw_srq->sge_prod++;
3581 		}
3582 
3583 		/* Flush WQE and SGE information before
3584 		 * updating producer.
3585 		 */
3586 		wmb();
3587 
3588 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3589 		 * in first 4 bytes and need to update WQE producer in
3590 		 * next 4 bytes.
3591 		 */
3592 		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3593 		offset = offsetof(struct rdma_srq_producers, wqe_prod);
3594 		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3595 			hw_srq->wqe_prod;
3596 
3597 		/* Flush producer after updating it. */
3598 		wmb();
3599 		wr = wr->next;
3600 	}
3601 
3602 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3603 		 qed_chain_get_elem_left(pbl));
3604 	spin_unlock_irqrestore(&srq->lock, flags);
3605 
3606 	return status;
3607 }
3608 
3609 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3610 		   const struct ib_recv_wr **bad_wr)
3611 {
3612 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3613 	struct qedr_dev *dev = qp->dev;
3614 	unsigned long flags;
3615 	int status = 0;
3616 
3617 	if (qp->qp_type == IB_QPT_GSI)
3618 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3619 
3620 	spin_lock_irqsave(&qp->q_lock, flags);
3621 
3622 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3623 		spin_unlock_irqrestore(&qp->q_lock, flags);
3624 		*bad_wr = wr;
3625 		return -EINVAL;
3626 	}
3627 
3628 	while (wr) {
3629 		int i;
3630 
3631 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3632 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3633 		    wr->num_sge > qp->rq.max_sges) {
3634 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3635 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3636 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3637 			       qp->rq.max_sges);
3638 			status = -ENOMEM;
3639 			*bad_wr = wr;
3640 			break;
3641 		}
3642 		for (i = 0; i < wr->num_sge; i++) {
3643 			u32 flags = 0;
3644 			struct rdma_rq_sge *rqe =
3645 			    qed_chain_produce(&qp->rq.pbl);
3646 
3647 			/* First one must include the number
3648 			 * of SGE in the list
3649 			 */
3650 			if (!i)
3651 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3652 					  wr->num_sge);
3653 
3654 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3655 				  wr->sg_list[i].lkey);
3656 
3657 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3658 				   wr->sg_list[i].length, flags);
3659 		}
3660 
3661 		/* Special case of no sges. FW requires between 1-4 sges...
3662 		 * in this case we need to post 1 sge with length zero. this is
3663 		 * because rdma write with immediate consumes an RQ.
3664 		 */
3665 		if (!wr->num_sge) {
3666 			u32 flags = 0;
3667 			struct rdma_rq_sge *rqe =
3668 			    qed_chain_produce(&qp->rq.pbl);
3669 
3670 			/* First one must include the number
3671 			 * of SGE in the list
3672 			 */
3673 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3674 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3675 
3676 			RQ_SGE_SET(rqe, 0, 0, flags);
3677 			i = 1;
3678 		}
3679 
3680 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3681 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3682 
3683 		qedr_inc_sw_prod(&qp->rq);
3684 
3685 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3686 		 * soon as we give the doorbell, we could get a completion
3687 		 * for this wr, therefore we need to make sure that the
3688 		 * memory is update before giving the doorbell.
3689 		 * During qedr_poll_cq, rmb is called before accessing the
3690 		 * cqe. This covers for the smp_rmb as well.
3691 		 */
3692 		smp_wmb();
3693 
3694 		qp->rq.db_data.data.value++;
3695 
3696 		writel(qp->rq.db_data.raw, qp->rq.db);
3697 
3698 		/* Make sure write sticks */
3699 		mmiowb();
3700 
3701 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3702 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3703 			mmiowb();	/* for second doorbell */
3704 		}
3705 
3706 		wr = wr->next;
3707 	}
3708 
3709 	spin_unlock_irqrestore(&qp->q_lock, flags);
3710 
3711 	return status;
3712 }
3713 
3714 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3715 {
3716 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3717 
3718 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3719 		cq->pbl_toggle;
3720 }
3721 
3722 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3723 {
3724 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3725 	struct qedr_qp *qp;
3726 
3727 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3728 						   resp_cqe->qp_handle.lo,
3729 						   u64);
3730 	return qp;
3731 }
3732 
3733 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3734 {
3735 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3736 
3737 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3738 }
3739 
3740 /* Return latest CQE (needs processing) */
3741 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3742 {
3743 	return cq->latest_cqe;
3744 }
3745 
3746 /* In fmr we need to increase the number of fmr completed counter for the fmr
3747  * algorithm determining whether we can free a pbl or not.
3748  * we need to perform this whether the work request was signaled or not. for
3749  * this purpose we call this function from the condition that checks if a wr
3750  * should be skipped, to make sure we don't miss it ( possibly this fmr
3751  * operation was not signalted)
3752  */
3753 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3754 {
3755 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3756 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3757 }
3758 
3759 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3760 		       struct qedr_cq *cq, int num_entries,
3761 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3762 		       int force)
3763 {
3764 	u16 cnt = 0;
3765 
3766 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3767 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3768 			qedr_chk_if_fmr(qp);
3769 			/* skip WC */
3770 			goto next_cqe;
3771 		}
3772 
3773 		/* fill WC */
3774 		wc->status = status;
3775 		wc->vendor_err = 0;
3776 		wc->wc_flags = 0;
3777 		wc->src_qp = qp->id;
3778 		wc->qp = &qp->ibqp;
3779 
3780 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3781 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3782 
3783 		switch (wc->opcode) {
3784 		case IB_WC_RDMA_WRITE:
3785 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3786 			break;
3787 		case IB_WC_COMP_SWAP:
3788 		case IB_WC_FETCH_ADD:
3789 			wc->byte_len = 8;
3790 			break;
3791 		case IB_WC_REG_MR:
3792 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3793 			break;
3794 		case IB_WC_RDMA_READ:
3795 		case IB_WC_SEND:
3796 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3797 			break;
3798 		default:
3799 			break;
3800 		}
3801 
3802 		num_entries--;
3803 		wc++;
3804 		cnt++;
3805 next_cqe:
3806 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3807 			qed_chain_consume(&qp->sq.pbl);
3808 		qedr_inc_sw_cons(&qp->sq);
3809 	}
3810 
3811 	return cnt;
3812 }
3813 
3814 static int qedr_poll_cq_req(struct qedr_dev *dev,
3815 			    struct qedr_qp *qp, struct qedr_cq *cq,
3816 			    int num_entries, struct ib_wc *wc,
3817 			    struct rdma_cqe_requester *req)
3818 {
3819 	int cnt = 0;
3820 
3821 	switch (req->status) {
3822 	case RDMA_CQE_REQ_STS_OK:
3823 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3824 				  IB_WC_SUCCESS, 0);
3825 		break;
3826 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3827 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3828 			DP_DEBUG(dev, QEDR_MSG_CQ,
3829 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3830 				 cq->icid, qp->icid);
3831 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3832 				  IB_WC_WR_FLUSH_ERR, 1);
3833 		break;
3834 	default:
3835 		/* process all WQE before the cosumer */
3836 		qp->state = QED_ROCE_QP_STATE_ERR;
3837 		cnt = process_req(dev, qp, cq, num_entries, wc,
3838 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3839 		wc += cnt;
3840 		/* if we have extra WC fill it with actual error info */
3841 		if (cnt < num_entries) {
3842 			enum ib_wc_status wc_status;
3843 
3844 			switch (req->status) {
3845 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3846 				DP_ERR(dev,
3847 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3848 				       cq->icid, qp->icid);
3849 				wc_status = IB_WC_BAD_RESP_ERR;
3850 				break;
3851 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3852 				DP_ERR(dev,
3853 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3854 				       cq->icid, qp->icid);
3855 				wc_status = IB_WC_LOC_LEN_ERR;
3856 				break;
3857 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3858 				DP_ERR(dev,
3859 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3860 				       cq->icid, qp->icid);
3861 				wc_status = IB_WC_LOC_QP_OP_ERR;
3862 				break;
3863 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3864 				DP_ERR(dev,
3865 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3866 				       cq->icid, qp->icid);
3867 				wc_status = IB_WC_LOC_PROT_ERR;
3868 				break;
3869 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3870 				DP_ERR(dev,
3871 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3872 				       cq->icid, qp->icid);
3873 				wc_status = IB_WC_MW_BIND_ERR;
3874 				break;
3875 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3876 				DP_ERR(dev,
3877 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3878 				       cq->icid, qp->icid);
3879 				wc_status = IB_WC_REM_INV_REQ_ERR;
3880 				break;
3881 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3882 				DP_ERR(dev,
3883 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3884 				       cq->icid, qp->icid);
3885 				wc_status = IB_WC_REM_ACCESS_ERR;
3886 				break;
3887 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3888 				DP_ERR(dev,
3889 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3890 				       cq->icid, qp->icid);
3891 				wc_status = IB_WC_REM_OP_ERR;
3892 				break;
3893 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3894 				DP_ERR(dev,
3895 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3896 				       cq->icid, qp->icid);
3897 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3898 				break;
3899 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3900 				DP_ERR(dev,
3901 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3902 				       cq->icid, qp->icid);
3903 				wc_status = IB_WC_RETRY_EXC_ERR;
3904 				break;
3905 			default:
3906 				DP_ERR(dev,
3907 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3908 				       cq->icid, qp->icid);
3909 				wc_status = IB_WC_GENERAL_ERR;
3910 			}
3911 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3912 					   wc_status, 1);
3913 		}
3914 	}
3915 
3916 	return cnt;
3917 }
3918 
3919 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3920 {
3921 	switch (status) {
3922 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3923 		return IB_WC_LOC_ACCESS_ERR;
3924 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3925 		return IB_WC_LOC_LEN_ERR;
3926 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3927 		return IB_WC_LOC_QP_OP_ERR;
3928 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3929 		return IB_WC_LOC_PROT_ERR;
3930 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3931 		return IB_WC_MW_BIND_ERR;
3932 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3933 		return IB_WC_REM_INV_RD_REQ_ERR;
3934 	case RDMA_CQE_RESP_STS_OK:
3935 		return IB_WC_SUCCESS;
3936 	default:
3937 		return IB_WC_GENERAL_ERR;
3938 	}
3939 }
3940 
3941 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3942 					  struct ib_wc *wc)
3943 {
3944 	wc->status = IB_WC_SUCCESS;
3945 	wc->byte_len = le32_to_cpu(resp->length);
3946 
3947 	if (resp->flags & QEDR_RESP_IMM) {
3948 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3949 		wc->wc_flags |= IB_WC_WITH_IMM;
3950 
3951 		if (resp->flags & QEDR_RESP_RDMA)
3952 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3953 
3954 		if (resp->flags & QEDR_RESP_INV)
3955 			return -EINVAL;
3956 
3957 	} else if (resp->flags & QEDR_RESP_INV) {
3958 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3959 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3960 
3961 		if (resp->flags & QEDR_RESP_RDMA)
3962 			return -EINVAL;
3963 
3964 	} else if (resp->flags & QEDR_RESP_RDMA) {
3965 		return -EINVAL;
3966 	}
3967 
3968 	return 0;
3969 }
3970 
3971 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3972 			       struct qedr_cq *cq, struct ib_wc *wc,
3973 			       struct rdma_cqe_responder *resp, u64 wr_id)
3974 {
3975 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3976 	wc->opcode = IB_WC_RECV;
3977 	wc->wc_flags = 0;
3978 
3979 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3980 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3981 			DP_ERR(dev,
3982 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3983 			       cq, cq->icid, resp->flags);
3984 
3985 	} else {
3986 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3987 		if (wc->status == IB_WC_GENERAL_ERR)
3988 			DP_ERR(dev,
3989 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3990 			       cq, cq->icid, resp->status);
3991 	}
3992 
3993 	/* Fill the rest of the WC */
3994 	wc->vendor_err = 0;
3995 	wc->src_qp = qp->id;
3996 	wc->qp = &qp->ibqp;
3997 	wc->wr_id = wr_id;
3998 }
3999 
4000 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4001 				struct qedr_cq *cq, struct ib_wc *wc,
4002 				struct rdma_cqe_responder *resp)
4003 {
4004 	struct qedr_srq *srq = qp->srq;
4005 	u64 wr_id;
4006 
4007 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4008 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4009 
4010 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4011 		wc->status = IB_WC_WR_FLUSH_ERR;
4012 		wc->vendor_err = 0;
4013 		wc->wr_id = wr_id;
4014 		wc->byte_len = 0;
4015 		wc->src_qp = qp->id;
4016 		wc->qp = &qp->ibqp;
4017 		wc->wr_id = wr_id;
4018 	} else {
4019 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4020 	}
4021 	srq->hw_srq.wr_cons_cnt++;
4022 
4023 	return 1;
4024 }
4025 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4026 			    struct qedr_cq *cq, struct ib_wc *wc,
4027 			    struct rdma_cqe_responder *resp)
4028 {
4029 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4030 
4031 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4032 
4033 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4034 		qed_chain_consume(&qp->rq.pbl);
4035 	qedr_inc_sw_cons(&qp->rq);
4036 
4037 	return 1;
4038 }
4039 
4040 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4041 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4042 {
4043 	u16 cnt = 0;
4044 
4045 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4046 		/* fill WC */
4047 		wc->status = IB_WC_WR_FLUSH_ERR;
4048 		wc->vendor_err = 0;
4049 		wc->wc_flags = 0;
4050 		wc->src_qp = qp->id;
4051 		wc->byte_len = 0;
4052 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4053 		wc->qp = &qp->ibqp;
4054 		num_entries--;
4055 		wc++;
4056 		cnt++;
4057 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4058 			qed_chain_consume(&qp->rq.pbl);
4059 		qedr_inc_sw_cons(&qp->rq);
4060 	}
4061 
4062 	return cnt;
4063 }
4064 
4065 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4066 				 struct rdma_cqe_responder *resp, int *update)
4067 {
4068 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4069 		consume_cqe(cq);
4070 		*update |= 1;
4071 	}
4072 }
4073 
4074 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4075 				 struct qedr_cq *cq, int num_entries,
4076 				 struct ib_wc *wc,
4077 				 struct rdma_cqe_responder *resp)
4078 {
4079 	int cnt;
4080 
4081 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4082 	consume_cqe(cq);
4083 
4084 	return cnt;
4085 }
4086 
4087 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4088 			     struct qedr_cq *cq, int num_entries,
4089 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4090 			     int *update)
4091 {
4092 	int cnt;
4093 
4094 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4095 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4096 					 resp->rq_cons_or_srq_id);
4097 		try_consume_resp_cqe(cq, qp, resp, update);
4098 	} else {
4099 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4100 		consume_cqe(cq);
4101 		*update |= 1;
4102 	}
4103 
4104 	return cnt;
4105 }
4106 
4107 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4108 				struct rdma_cqe_requester *req, int *update)
4109 {
4110 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4111 		consume_cqe(cq);
4112 		*update |= 1;
4113 	}
4114 }
4115 
4116 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4117 {
4118 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4119 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4120 	union rdma_cqe *cqe;
4121 	u32 old_cons, new_cons;
4122 	unsigned long flags;
4123 	int update = 0;
4124 	int done = 0;
4125 
4126 	if (cq->destroyed) {
4127 		DP_ERR(dev,
4128 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4129 		       cq, cq->icid);
4130 		return 0;
4131 	}
4132 
4133 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4134 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4135 
4136 	spin_lock_irqsave(&cq->cq_lock, flags);
4137 	cqe = cq->latest_cqe;
4138 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4139 	while (num_entries && is_valid_cqe(cq, cqe)) {
4140 		struct qedr_qp *qp;
4141 		int cnt = 0;
4142 
4143 		/* prevent speculative reads of any field of CQE */
4144 		rmb();
4145 
4146 		qp = cqe_get_qp(cqe);
4147 		if (!qp) {
4148 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4149 			break;
4150 		}
4151 
4152 		wc->qp = &qp->ibqp;
4153 
4154 		switch (cqe_get_type(cqe)) {
4155 		case RDMA_CQE_TYPE_REQUESTER:
4156 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4157 					       &cqe->req);
4158 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4159 			break;
4160 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4161 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4162 						&cqe->resp, &update);
4163 			break;
4164 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4165 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4166 						    wc, &cqe->resp);
4167 			update = 1;
4168 			break;
4169 		case RDMA_CQE_TYPE_INVALID:
4170 		default:
4171 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4172 			       cqe_get_type(cqe));
4173 		}
4174 		num_entries -= cnt;
4175 		wc += cnt;
4176 		done += cnt;
4177 
4178 		cqe = get_cqe(cq);
4179 	}
4180 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4181 
4182 	cq->cq_cons += new_cons - old_cons;
4183 
4184 	if (update)
4185 		/* doorbell notifies abount latest VALID entry,
4186 		 * but chain already point to the next INVALID one
4187 		 */
4188 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4189 
4190 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4191 	return done;
4192 }
4193 
4194 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4195 		     u8 port_num,
4196 		     const struct ib_wc *in_wc,
4197 		     const struct ib_grh *in_grh,
4198 		     const struct ib_mad_hdr *mad_hdr,
4199 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
4200 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4201 {
4202 	struct qedr_dev *dev = get_qedr_dev(ibdev);
4203 
4204 	DP_DEBUG(dev, QEDR_MSG_GSI,
4205 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
4206 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
4207 		 mad_hdr->class_specific, mad_hdr->class_version,
4208 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
4209 	return IB_MAD_RESULT_SUCCESS;
4210 }
4211