xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision a61127c2)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 
55 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
56 #define	RDMA_MAX_SGE_PER_SRQ	(4)
57 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
58 
59 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
60 
61 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
62 					size_t len)
63 {
64 	size_t min_len = min_t(size_t, len, udata->outlen);
65 
66 	return ib_copy_to_udata(udata, src, min_len);
67 }
68 
69 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
70 {
71 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
72 		return -EINVAL;
73 
74 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
75 	return 0;
76 }
77 
78 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
79 		      int index, union ib_gid *sgid)
80 {
81 	struct qedr_dev *dev = get_qedr_dev(ibdev);
82 
83 	memset(sgid->raw, 0, sizeof(sgid->raw));
84 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
85 
86 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
87 		 sgid->global.interface_id, sgid->global.subnet_prefix);
88 
89 	return 0;
90 }
91 
92 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
93 {
94 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
95 	struct qedr_device_attr *qattr = &dev->attr;
96 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
97 
98 	srq_attr->srq_limit = srq->srq_limit;
99 	srq_attr->max_wr = qattr->max_srq_wr;
100 	srq_attr->max_sge = qattr->max_sge;
101 
102 	return 0;
103 }
104 
105 int qedr_query_device(struct ib_device *ibdev,
106 		      struct ib_device_attr *attr, struct ib_udata *udata)
107 {
108 	struct qedr_dev *dev = get_qedr_dev(ibdev);
109 	struct qedr_device_attr *qattr = &dev->attr;
110 
111 	if (!dev->rdma_ctx) {
112 		DP_ERR(dev,
113 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
114 		       dev->rdma_ctx);
115 		return -EINVAL;
116 	}
117 
118 	memset(attr, 0, sizeof(*attr));
119 
120 	attr->fw_ver = qattr->fw_ver;
121 	attr->sys_image_guid = qattr->sys_image_guid;
122 	attr->max_mr_size = qattr->max_mr_size;
123 	attr->page_size_cap = qattr->page_size_caps;
124 	attr->vendor_id = qattr->vendor_id;
125 	attr->vendor_part_id = qattr->vendor_part_id;
126 	attr->hw_ver = qattr->hw_ver;
127 	attr->max_qp = qattr->max_qp;
128 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
129 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
130 	    IB_DEVICE_RC_RNR_NAK_GEN |
131 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
132 
133 	attr->max_send_sge = qattr->max_sge;
134 	attr->max_recv_sge = qattr->max_sge;
135 	attr->max_sge_rd = qattr->max_sge;
136 	attr->max_cq = qattr->max_cq;
137 	attr->max_cqe = qattr->max_cqe;
138 	attr->max_mr = qattr->max_mr;
139 	attr->max_mw = qattr->max_mw;
140 	attr->max_pd = qattr->max_pd;
141 	attr->atomic_cap = dev->atomic_cap;
142 	attr->max_fmr = qattr->max_fmr;
143 	attr->max_map_per_fmr = 16;
144 	attr->max_qp_init_rd_atom =
145 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
146 	attr->max_qp_rd_atom =
147 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
148 		attr->max_qp_init_rd_atom);
149 
150 	attr->max_srq = qattr->max_srq;
151 	attr->max_srq_sge = qattr->max_srq_sge;
152 	attr->max_srq_wr = qattr->max_srq_wr;
153 
154 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
155 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
156 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
157 	attr->max_ah = qattr->max_ah;
158 
159 	return 0;
160 }
161 
162 #define QEDR_SPEED_SDR		(1)
163 #define QEDR_SPEED_DDR		(2)
164 #define QEDR_SPEED_QDR		(4)
165 #define QEDR_SPEED_FDR10	(8)
166 #define QEDR_SPEED_FDR		(16)
167 #define QEDR_SPEED_EDR		(32)
168 
169 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
170 					    u8 *ib_width)
171 {
172 	switch (speed) {
173 	case 1000:
174 		*ib_speed = QEDR_SPEED_SDR;
175 		*ib_width = IB_WIDTH_1X;
176 		break;
177 	case 10000:
178 		*ib_speed = QEDR_SPEED_QDR;
179 		*ib_width = IB_WIDTH_1X;
180 		break;
181 
182 	case 20000:
183 		*ib_speed = QEDR_SPEED_DDR;
184 		*ib_width = IB_WIDTH_4X;
185 		break;
186 
187 	case 25000:
188 		*ib_speed = QEDR_SPEED_EDR;
189 		*ib_width = IB_WIDTH_1X;
190 		break;
191 
192 	case 40000:
193 		*ib_speed = QEDR_SPEED_QDR;
194 		*ib_width = IB_WIDTH_4X;
195 		break;
196 
197 	case 50000:
198 		*ib_speed = QEDR_SPEED_QDR;
199 		*ib_width = IB_WIDTH_4X;
200 		break;
201 
202 	case 100000:
203 		*ib_speed = QEDR_SPEED_EDR;
204 		*ib_width = IB_WIDTH_4X;
205 		break;
206 
207 	default:
208 		/* Unsupported */
209 		*ib_speed = QEDR_SPEED_SDR;
210 		*ib_width = IB_WIDTH_1X;
211 	}
212 }
213 
214 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
215 {
216 	struct qedr_dev *dev;
217 	struct qed_rdma_port *rdma_port;
218 
219 	dev = get_qedr_dev(ibdev);
220 
221 	if (!dev->rdma_ctx) {
222 		DP_ERR(dev, "rdma_ctx is NULL\n");
223 		return -EINVAL;
224 	}
225 
226 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
227 
228 	/* *attr being zeroed by the caller, avoid zeroing it here */
229 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
230 		attr->state = IB_PORT_ACTIVE;
231 		attr->phys_state = 5;
232 	} else {
233 		attr->state = IB_PORT_DOWN;
234 		attr->phys_state = 3;
235 	}
236 	attr->max_mtu = IB_MTU_4096;
237 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
238 	attr->lid = 0;
239 	attr->lmc = 0;
240 	attr->sm_lid = 0;
241 	attr->sm_sl = 0;
242 	attr->ip_gids = true;
243 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
244 		attr->gid_tbl_len = 1;
245 		attr->pkey_tbl_len = 1;
246 	} else {
247 		attr->gid_tbl_len = QEDR_MAX_SGID;
248 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
249 	}
250 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
251 	attr->qkey_viol_cntr = 0;
252 	get_link_speed_and_width(rdma_port->link_speed,
253 				 &attr->active_speed, &attr->active_width);
254 	attr->max_msg_sz = rdma_port->max_msg_size;
255 	attr->max_vl_num = 4;
256 
257 	return 0;
258 }
259 
260 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
261 		     struct ib_port_modify *props)
262 {
263 	return 0;
264 }
265 
266 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
267 			 unsigned long len)
268 {
269 	struct qedr_mm *mm;
270 
271 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
272 	if (!mm)
273 		return -ENOMEM;
274 
275 	mm->key.phy_addr = phy_addr;
276 	/* This function might be called with a length which is not a multiple
277 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
278 	 * forces this granularity by increasing the requested size if needed.
279 	 * When qedr_mmap is called, it will search the list with the updated
280 	 * length as a key. To prevent search failures, the length is rounded up
281 	 * in advance to PAGE_SIZE.
282 	 */
283 	mm->key.len = roundup(len, PAGE_SIZE);
284 	INIT_LIST_HEAD(&mm->entry);
285 
286 	mutex_lock(&uctx->mm_list_lock);
287 	list_add(&mm->entry, &uctx->mm_head);
288 	mutex_unlock(&uctx->mm_list_lock);
289 
290 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
291 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
292 		 (unsigned long long)mm->key.phy_addr,
293 		 (unsigned long)mm->key.len, uctx);
294 
295 	return 0;
296 }
297 
298 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
299 			     unsigned long len)
300 {
301 	bool found = false;
302 	struct qedr_mm *mm;
303 
304 	mutex_lock(&uctx->mm_list_lock);
305 	list_for_each_entry(mm, &uctx->mm_head, entry) {
306 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
307 			continue;
308 
309 		found = true;
310 		break;
311 	}
312 	mutex_unlock(&uctx->mm_list_lock);
313 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
314 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
315 		 mm->key.phy_addr, mm->key.len, uctx, found);
316 
317 	return found;
318 }
319 
320 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
321 {
322 	struct ib_device *ibdev = uctx->device;
323 	int rc;
324 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
325 	struct qedr_alloc_ucontext_resp uresp = {};
326 	struct qedr_dev *dev = get_qedr_dev(ibdev);
327 	struct qed_rdma_add_user_out_params oparams;
328 
329 	if (!udata)
330 		return -EFAULT;
331 
332 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
333 	if (rc) {
334 		DP_ERR(dev,
335 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
336 		       rc);
337 		return rc;
338 	}
339 
340 	ctx->dpi = oparams.dpi;
341 	ctx->dpi_addr = oparams.dpi_addr;
342 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
343 	ctx->dpi_size = oparams.dpi_size;
344 	INIT_LIST_HEAD(&ctx->mm_head);
345 	mutex_init(&ctx->mm_list_lock);
346 
347 	uresp.dpm_enabled = dev->user_dpm_enabled;
348 	uresp.wids_enabled = 1;
349 	uresp.wid_count = oparams.wid_count;
350 	uresp.db_pa = ctx->dpi_phys_addr;
351 	uresp.db_size = ctx->dpi_size;
352 	uresp.max_send_wr = dev->attr.max_sqe;
353 	uresp.max_recv_wr = dev->attr.max_rqe;
354 	uresp.max_srq_wr = dev->attr.max_srq_wr;
355 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
356 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
357 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
358 	uresp.max_cqes = QEDR_MAX_CQES;
359 
360 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
361 	if (rc)
362 		return rc;
363 
364 	ctx->dev = dev;
365 
366 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
367 	if (rc)
368 		return rc;
369 
370 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
371 		 &ctx->ibucontext);
372 	return 0;
373 }
374 
375 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
376 {
377 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
378 	struct qedr_mm *mm, *tmp;
379 
380 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
381 		 uctx);
382 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
383 
384 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
385 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
386 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
387 			 mm->key.phy_addr, mm->key.len, uctx);
388 		list_del(&mm->entry);
389 		kfree(mm);
390 	}
391 }
392 
393 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
394 {
395 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
396 	struct qedr_dev *dev = get_qedr_dev(context->device);
397 	unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
398 	unsigned long len = (vma->vm_end - vma->vm_start);
399 	unsigned long dpi_start;
400 
401 	dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
402 
403 	DP_DEBUG(dev, QEDR_MSG_INIT,
404 		 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
405 		 (void *)vma->vm_start, (void *)vma->vm_end,
406 		 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
407 
408 	if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
409 		DP_ERR(dev,
410 		       "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
411 		       (void *)vma->vm_start, (void *)vma->vm_end);
412 		return -EINVAL;
413 	}
414 
415 	if (!qedr_search_mmap(ucontext, phys_addr, len)) {
416 		DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
417 		       vma->vm_pgoff);
418 		return -EINVAL;
419 	}
420 
421 	if (phys_addr < dpi_start ||
422 	    ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
423 		DP_ERR(dev,
424 		       "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
425 		       (void *)phys_addr, (void *)dpi_start,
426 		       ucontext->dpi_size);
427 		return -EINVAL;
428 	}
429 
430 	if (vma->vm_flags & VM_READ) {
431 		DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
432 		return -EINVAL;
433 	}
434 
435 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
436 	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
437 				  vma->vm_page_prot);
438 }
439 
440 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
441 {
442 	struct ib_device *ibdev = ibpd->device;
443 	struct qedr_dev *dev = get_qedr_dev(ibdev);
444 	struct qedr_pd *pd = get_qedr_pd(ibpd);
445 	u16 pd_id;
446 	int rc;
447 
448 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
449 		 udata ? "User Lib" : "Kernel");
450 
451 	if (!dev->rdma_ctx) {
452 		DP_ERR(dev, "invalid RDMA context\n");
453 		return -EINVAL;
454 	}
455 
456 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
457 	if (rc)
458 		return rc;
459 
460 	pd->pd_id = pd_id;
461 
462 	if (udata) {
463 		struct qedr_alloc_pd_uresp uresp = {
464 			.pd_id = pd_id,
465 		};
466 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
467 			udata, struct qedr_ucontext, ibucontext);
468 
469 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
470 		if (rc) {
471 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
472 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
473 			return rc;
474 		}
475 
476 		pd->uctx = context;
477 		pd->uctx->pd = pd;
478 	}
479 
480 	return 0;
481 }
482 
483 void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
484 {
485 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
486 	struct qedr_pd *pd = get_qedr_pd(ibpd);
487 
488 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
489 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
490 }
491 
492 static void qedr_free_pbl(struct qedr_dev *dev,
493 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
494 {
495 	struct pci_dev *pdev = dev->pdev;
496 	int i;
497 
498 	for (i = 0; i < pbl_info->num_pbls; i++) {
499 		if (!pbl[i].va)
500 			continue;
501 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
502 				  pbl[i].va, pbl[i].pa);
503 	}
504 
505 	kfree(pbl);
506 }
507 
508 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
509 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
510 
511 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
512 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
513 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
514 
515 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
516 					   struct qedr_pbl_info *pbl_info,
517 					   gfp_t flags)
518 {
519 	struct pci_dev *pdev = dev->pdev;
520 	struct qedr_pbl *pbl_table;
521 	dma_addr_t *pbl_main_tbl;
522 	dma_addr_t pa;
523 	void *va;
524 	int i;
525 
526 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
527 	if (!pbl_table)
528 		return ERR_PTR(-ENOMEM);
529 
530 	for (i = 0; i < pbl_info->num_pbls; i++) {
531 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
532 					flags);
533 		if (!va)
534 			goto err;
535 
536 		pbl_table[i].va = va;
537 		pbl_table[i].pa = pa;
538 	}
539 
540 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
541 	 * the first one with physical pointers to all of the rest
542 	 */
543 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
544 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
545 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
546 
547 	return pbl_table;
548 
549 err:
550 	for (i--; i >= 0; i--)
551 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
552 				  pbl_table[i].va, pbl_table[i].pa);
553 
554 	qedr_free_pbl(dev, pbl_info, pbl_table);
555 
556 	return ERR_PTR(-ENOMEM);
557 }
558 
559 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
560 				struct qedr_pbl_info *pbl_info,
561 				u32 num_pbes, int two_layer_capable)
562 {
563 	u32 pbl_capacity;
564 	u32 pbl_size;
565 	u32 num_pbls;
566 
567 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
568 		if (num_pbes > MAX_PBES_TWO_LAYER) {
569 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
570 			       num_pbes);
571 			return -EINVAL;
572 		}
573 
574 		/* calculate required pbl page size */
575 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
576 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
577 			       NUM_PBES_ON_PAGE(pbl_size);
578 
579 		while (pbl_capacity < num_pbes) {
580 			pbl_size *= 2;
581 			pbl_capacity = pbl_size / sizeof(u64);
582 			pbl_capacity = pbl_capacity * pbl_capacity;
583 		}
584 
585 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
586 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
587 		pbl_info->two_layered = true;
588 	} else {
589 		/* One layered PBL */
590 		num_pbls = 1;
591 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
592 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
593 		pbl_info->two_layered = false;
594 	}
595 
596 	pbl_info->num_pbls = num_pbls;
597 	pbl_info->pbl_size = pbl_size;
598 	pbl_info->num_pbes = num_pbes;
599 
600 	DP_DEBUG(dev, QEDR_MSG_MR,
601 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
602 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
603 
604 	return 0;
605 }
606 
607 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
608 			       struct qedr_pbl *pbl,
609 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
610 {
611 	int pbe_cnt, total_num_pbes = 0;
612 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
613 	struct qedr_pbl *pbl_tbl;
614 	struct sg_dma_page_iter sg_iter;
615 	struct regpair *pbe;
616 	u64 pg_addr;
617 
618 	if (!pbl_info->num_pbes)
619 		return;
620 
621 	/* If we have a two layered pbl, the first pbl points to the rest
622 	 * of the pbls and the first entry lays on the second pbl in the table
623 	 */
624 	if (pbl_info->two_layered)
625 		pbl_tbl = &pbl[1];
626 	else
627 		pbl_tbl = pbl;
628 
629 	pbe = (struct regpair *)pbl_tbl->va;
630 	if (!pbe) {
631 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
632 		return;
633 	}
634 
635 	pbe_cnt = 0;
636 
637 	fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
638 
639 	for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
640 		pg_addr = sg_page_iter_dma_address(&sg_iter);
641 		for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
642 			pbe->lo = cpu_to_le32(pg_addr);
643 			pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
644 
645 			pg_addr += BIT(pg_shift);
646 			pbe_cnt++;
647 			total_num_pbes++;
648 			pbe++;
649 
650 			if (total_num_pbes == pbl_info->num_pbes)
651 				return;
652 
653 			/* If the given pbl is full storing the pbes,
654 			 * move to next pbl.
655 			 */
656 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
657 				pbl_tbl++;
658 				pbe = (struct regpair *)pbl_tbl->va;
659 				pbe_cnt = 0;
660 			}
661 
662 			fw_pg_cnt++;
663 		}
664 	}
665 }
666 
667 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
668 			      struct qedr_cq *cq, struct ib_udata *udata)
669 {
670 	struct qedr_create_cq_uresp uresp;
671 	int rc;
672 
673 	memset(&uresp, 0, sizeof(uresp));
674 
675 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
676 	uresp.icid = cq->icid;
677 
678 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
679 	if (rc)
680 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
681 
682 	return rc;
683 }
684 
685 static void consume_cqe(struct qedr_cq *cq)
686 {
687 	if (cq->latest_cqe == cq->toggle_cqe)
688 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
689 
690 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
691 }
692 
693 static inline int qedr_align_cq_entries(int entries)
694 {
695 	u64 size, aligned_size;
696 
697 	/* We allocate an extra entry that we don't report to the FW. */
698 	size = (entries + 1) * QEDR_CQE_SIZE;
699 	aligned_size = ALIGN(size, PAGE_SIZE);
700 
701 	return aligned_size / QEDR_CQE_SIZE;
702 }
703 
704 static inline int qedr_init_user_queue(struct ib_udata *udata,
705 				       struct qedr_dev *dev,
706 				       struct qedr_userq *q, u64 buf_addr,
707 				       size_t buf_len, int access, int dmasync,
708 				       int alloc_and_init)
709 {
710 	u32 fw_pages;
711 	int rc;
712 
713 	q->buf_addr = buf_addr;
714 	q->buf_len = buf_len;
715 	q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync);
716 	if (IS_ERR(q->umem)) {
717 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
718 		       PTR_ERR(q->umem));
719 		return PTR_ERR(q->umem);
720 	}
721 
722 	fw_pages = ib_umem_page_count(q->umem) <<
723 	    (PAGE_SHIFT - FW_PAGE_SHIFT);
724 
725 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
726 	if (rc)
727 		goto err0;
728 
729 	if (alloc_and_init) {
730 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
731 		if (IS_ERR(q->pbl_tbl)) {
732 			rc = PTR_ERR(q->pbl_tbl);
733 			goto err0;
734 		}
735 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
736 				   FW_PAGE_SHIFT);
737 	} else {
738 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
739 		if (!q->pbl_tbl) {
740 			rc = -ENOMEM;
741 			goto err0;
742 		}
743 	}
744 
745 	return 0;
746 
747 err0:
748 	ib_umem_release(q->umem);
749 	q->umem = NULL;
750 
751 	return rc;
752 }
753 
754 static inline void qedr_init_cq_params(struct qedr_cq *cq,
755 				       struct qedr_ucontext *ctx,
756 				       struct qedr_dev *dev, int vector,
757 				       int chain_entries, int page_cnt,
758 				       u64 pbl_ptr,
759 				       struct qed_rdma_create_cq_in_params
760 				       *params)
761 {
762 	memset(params, 0, sizeof(*params));
763 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
764 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
765 	params->cnq_id = vector;
766 	params->cq_size = chain_entries - 1;
767 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
768 	params->pbl_num_pages = page_cnt;
769 	params->pbl_ptr = pbl_ptr;
770 	params->pbl_two_level = 0;
771 }
772 
773 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
774 {
775 	cq->db.data.agg_flags = flags;
776 	cq->db.data.value = cpu_to_le32(cons);
777 	writeq(cq->db.raw, cq->db_addr);
778 }
779 
780 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
781 {
782 	struct qedr_cq *cq = get_qedr_cq(ibcq);
783 	unsigned long sflags;
784 	struct qedr_dev *dev;
785 
786 	dev = get_qedr_dev(ibcq->device);
787 
788 	if (cq->destroyed) {
789 		DP_ERR(dev,
790 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
791 		       cq, cq->icid);
792 		return -EINVAL;
793 	}
794 
795 
796 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
797 		return 0;
798 
799 	spin_lock_irqsave(&cq->cq_lock, sflags);
800 
801 	cq->arm_flags = 0;
802 
803 	if (flags & IB_CQ_SOLICITED)
804 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
805 
806 	if (flags & IB_CQ_NEXT_COMP)
807 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
808 
809 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
810 
811 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
812 
813 	return 0;
814 }
815 
816 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
817 			     const struct ib_cq_init_attr *attr,
818 			     struct ib_udata *udata)
819 {
820 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
821 		udata, struct qedr_ucontext, ibucontext);
822 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
823 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
824 	struct qedr_dev *dev = get_qedr_dev(ibdev);
825 	struct qed_rdma_create_cq_in_params params;
826 	struct qedr_create_cq_ureq ureq;
827 	int vector = attr->comp_vector;
828 	int entries = attr->cqe;
829 	struct qedr_cq *cq;
830 	int chain_entries;
831 	int page_cnt;
832 	u64 pbl_ptr;
833 	u16 icid;
834 	int rc;
835 
836 	DP_DEBUG(dev, QEDR_MSG_INIT,
837 		 "create_cq: called from %s. entries=%d, vector=%d\n",
838 		 udata ? "User Lib" : "Kernel", entries, vector);
839 
840 	if (entries > QEDR_MAX_CQES) {
841 		DP_ERR(dev,
842 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
843 		       entries, QEDR_MAX_CQES);
844 		return ERR_PTR(-EINVAL);
845 	}
846 
847 	chain_entries = qedr_align_cq_entries(entries);
848 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
849 
850 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
851 	if (!cq)
852 		return ERR_PTR(-ENOMEM);
853 
854 	if (udata) {
855 		memset(&ureq, 0, sizeof(ureq));
856 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
857 			DP_ERR(dev,
858 			       "create cq: problem copying data from user space\n");
859 			goto err0;
860 		}
861 
862 		if (!ureq.len) {
863 			DP_ERR(dev,
864 			       "create cq: cannot create a cq with 0 entries\n");
865 			goto err0;
866 		}
867 
868 		cq->cq_type = QEDR_CQ_TYPE_USER;
869 
870 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
871 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1,
872 					  1);
873 		if (rc)
874 			goto err0;
875 
876 		pbl_ptr = cq->q.pbl_tbl->pa;
877 		page_cnt = cq->q.pbl_info.num_pbes;
878 
879 		cq->ibcq.cqe = chain_entries;
880 	} else {
881 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
882 
883 		rc = dev->ops->common->chain_alloc(dev->cdev,
884 						   QED_CHAIN_USE_TO_CONSUME,
885 						   QED_CHAIN_MODE_PBL,
886 						   QED_CHAIN_CNT_TYPE_U32,
887 						   chain_entries,
888 						   sizeof(union rdma_cqe),
889 						   &cq->pbl, NULL);
890 		if (rc)
891 			goto err1;
892 
893 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
894 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
895 		cq->ibcq.cqe = cq->pbl.capacity;
896 	}
897 
898 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
899 			    pbl_ptr, &params);
900 
901 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
902 	if (rc)
903 		goto err2;
904 
905 	cq->icid = icid;
906 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
907 	spin_lock_init(&cq->cq_lock);
908 
909 	if (udata) {
910 		rc = qedr_copy_cq_uresp(dev, cq, udata);
911 		if (rc)
912 			goto err3;
913 	} else {
914 		/* Generate doorbell address. */
915 		cq->db_addr = dev->db_addr +
916 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
917 		cq->db.data.icid = cq->icid;
918 		cq->db.data.params = DB_AGG_CMD_SET <<
919 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
920 
921 		/* point to the very last element, passing it we will toggle */
922 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
923 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
924 		cq->latest_cqe = NULL;
925 		consume_cqe(cq);
926 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
927 	}
928 
929 	DP_DEBUG(dev, QEDR_MSG_CQ,
930 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
931 		 cq->icid, cq, params.cq_size);
932 
933 	return &cq->ibcq;
934 
935 err3:
936 	destroy_iparams.icid = cq->icid;
937 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
938 				  &destroy_oparams);
939 err2:
940 	if (udata)
941 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
942 	else
943 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
944 err1:
945 	if (udata)
946 		ib_umem_release(cq->q.umem);
947 err0:
948 	kfree(cq);
949 	return ERR_PTR(-EINVAL);
950 }
951 
952 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
953 {
954 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
955 	struct qedr_cq *cq = get_qedr_cq(ibcq);
956 
957 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
958 
959 	return 0;
960 }
961 
962 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
963 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
964 
965 int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
966 {
967 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
968 	struct qed_rdma_destroy_cq_out_params oparams;
969 	struct qed_rdma_destroy_cq_in_params iparams;
970 	struct qedr_cq *cq = get_qedr_cq(ibcq);
971 	int iter;
972 	int rc;
973 
974 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
975 
976 	cq->destroyed = 1;
977 
978 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
979 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
980 		goto done;
981 
982 	iparams.icid = cq->icid;
983 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
984 	if (rc)
985 		return rc;
986 
987 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
988 
989 	if (udata) {
990 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
991 		ib_umem_release(cq->q.umem);
992 	}
993 
994 	/* We don't want the IRQ handler to handle a non-existing CQ so we
995 	 * wait until all CNQ interrupts, if any, are received. This will always
996 	 * happen and will always happen very fast. If not, then a serious error
997 	 * has occured. That is why we can use a long delay.
998 	 * We spin for a short time so we don’t lose time on context switching
999 	 * in case all the completions are handled in that span. Otherwise
1000 	 * we sleep for a while and check again. Since the CNQ may be
1001 	 * associated with (only) the current CPU we use msleep to allow the
1002 	 * current CPU to be freed.
1003 	 * The CNQ notification is increased in qedr_irq_handler().
1004 	 */
1005 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1006 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1007 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1008 		iter--;
1009 	}
1010 
1011 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1012 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1013 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1014 		iter--;
1015 	}
1016 
1017 	if (oparams.num_cq_notif != cq->cnq_notif)
1018 		goto err;
1019 
1020 	/* Note that we don't need to have explicit code to wait for the
1021 	 * completion of the event handler because it is invoked from the EQ.
1022 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1023 	 * be certain that there's no event handler in process.
1024 	 */
1025 done:
1026 	cq->sig = ~cq->sig;
1027 
1028 	kfree(cq);
1029 
1030 	return 0;
1031 
1032 err:
1033 	DP_ERR(dev,
1034 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1035 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1036 
1037 	return -EINVAL;
1038 }
1039 
1040 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1041 					  struct ib_qp_attr *attr,
1042 					  int attr_mask,
1043 					  struct qed_rdma_modify_qp_in_params
1044 					  *qp_params)
1045 {
1046 	const struct ib_gid_attr *gid_attr;
1047 	enum rdma_network_type nw_type;
1048 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1049 	u32 ipv4_addr;
1050 	int ret;
1051 	int i;
1052 
1053 	gid_attr = grh->sgid_attr;
1054 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1055 	if (ret)
1056 		return ret;
1057 
1058 	nw_type = rdma_gid_attr_network_type(gid_attr);
1059 	switch (nw_type) {
1060 	case RDMA_NETWORK_IPV6:
1061 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1062 		       sizeof(qp_params->sgid));
1063 		memcpy(&qp_params->dgid.bytes[0],
1064 		       &grh->dgid,
1065 		       sizeof(qp_params->dgid));
1066 		qp_params->roce_mode = ROCE_V2_IPV6;
1067 		SET_FIELD(qp_params->modify_flags,
1068 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1069 		break;
1070 	case RDMA_NETWORK_IB:
1071 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1072 		       sizeof(qp_params->sgid));
1073 		memcpy(&qp_params->dgid.bytes[0],
1074 		       &grh->dgid,
1075 		       sizeof(qp_params->dgid));
1076 		qp_params->roce_mode = ROCE_V1;
1077 		break;
1078 	case RDMA_NETWORK_IPV4:
1079 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1080 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1081 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1082 		qp_params->sgid.ipv4_addr = ipv4_addr;
1083 		ipv4_addr =
1084 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1085 		qp_params->dgid.ipv4_addr = ipv4_addr;
1086 		SET_FIELD(qp_params->modify_flags,
1087 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1088 		qp_params->roce_mode = ROCE_V2_IPV4;
1089 		break;
1090 	}
1091 
1092 	for (i = 0; i < 4; i++) {
1093 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1094 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1095 	}
1096 
1097 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1098 		qp_params->vlan_id = 0;
1099 
1100 	return 0;
1101 }
1102 
1103 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1104 			       struct ib_qp_init_attr *attrs,
1105 			       struct ib_udata *udata)
1106 {
1107 	struct qedr_device_attr *qattr = &dev->attr;
1108 
1109 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1110 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1111 		DP_DEBUG(dev, QEDR_MSG_QP,
1112 			 "create qp: unsupported qp type=0x%x requested\n",
1113 			 attrs->qp_type);
1114 		return -EINVAL;
1115 	}
1116 
1117 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1118 		DP_ERR(dev,
1119 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1120 		       attrs->cap.max_send_wr, qattr->max_sqe);
1121 		return -EINVAL;
1122 	}
1123 
1124 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1125 		DP_ERR(dev,
1126 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1127 		       attrs->cap.max_inline_data, qattr->max_inline);
1128 		return -EINVAL;
1129 	}
1130 
1131 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1132 		DP_ERR(dev,
1133 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1134 		       attrs->cap.max_send_sge, qattr->max_sge);
1135 		return -EINVAL;
1136 	}
1137 
1138 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1139 		DP_ERR(dev,
1140 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1141 		       attrs->cap.max_recv_sge, qattr->max_sge);
1142 		return -EINVAL;
1143 	}
1144 
1145 	/* Unprivileged user space cannot create special QP */
1146 	if (udata && attrs->qp_type == IB_QPT_GSI) {
1147 		DP_ERR(dev,
1148 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1149 		       attrs->qp_type);
1150 		return -EINVAL;
1151 	}
1152 
1153 	return 0;
1154 }
1155 
1156 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1157 			       struct qedr_srq *srq, struct ib_udata *udata)
1158 {
1159 	struct qedr_create_srq_uresp uresp = {};
1160 	int rc;
1161 
1162 	uresp.srq_id = srq->srq_id;
1163 
1164 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1165 	if (rc)
1166 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1167 
1168 	return rc;
1169 }
1170 
1171 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1172 			       struct qedr_create_qp_uresp *uresp,
1173 			       struct qedr_qp *qp)
1174 {
1175 	/* iWARP requires two doorbells per RQ. */
1176 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1177 		uresp->rq_db_offset =
1178 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1179 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1180 	} else {
1181 		uresp->rq_db_offset =
1182 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1183 	}
1184 
1185 	uresp->rq_icid = qp->icid;
1186 }
1187 
1188 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1189 			       struct qedr_create_qp_uresp *uresp,
1190 			       struct qedr_qp *qp)
1191 {
1192 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1193 
1194 	/* iWARP uses the same cid for rq and sq */
1195 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1196 		uresp->sq_icid = qp->icid;
1197 	else
1198 		uresp->sq_icid = qp->icid + 1;
1199 }
1200 
1201 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1202 			      struct qedr_qp *qp, struct ib_udata *udata)
1203 {
1204 	struct qedr_create_qp_uresp uresp;
1205 	int rc;
1206 
1207 	memset(&uresp, 0, sizeof(uresp));
1208 	qedr_copy_sq_uresp(dev, &uresp, qp);
1209 	qedr_copy_rq_uresp(dev, &uresp, qp);
1210 
1211 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1212 	uresp.qp_id = qp->qp_id;
1213 
1214 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1215 	if (rc)
1216 		DP_ERR(dev,
1217 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1218 		       qp->icid);
1219 
1220 	return rc;
1221 }
1222 
1223 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1224 				      struct qedr_qp *qp,
1225 				      struct qedr_pd *pd,
1226 				      struct ib_qp_init_attr *attrs)
1227 {
1228 	spin_lock_init(&qp->q_lock);
1229 	atomic_set(&qp->refcnt, 1);
1230 	qp->pd = pd;
1231 	qp->qp_type = attrs->qp_type;
1232 	qp->max_inline_data = attrs->cap.max_inline_data;
1233 	qp->sq.max_sges = attrs->cap.max_send_sge;
1234 	qp->state = QED_ROCE_QP_STATE_RESET;
1235 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1236 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1237 	qp->dev = dev;
1238 
1239 	if (attrs->srq) {
1240 		qp->srq = get_qedr_srq(attrs->srq);
1241 	} else {
1242 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1243 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1244 		DP_DEBUG(dev, QEDR_MSG_QP,
1245 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1246 			 qp->rq.max_sges, qp->rq_cq->icid);
1247 	}
1248 
1249 	DP_DEBUG(dev, QEDR_MSG_QP,
1250 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1251 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1252 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1253 	DP_DEBUG(dev, QEDR_MSG_QP,
1254 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1255 		 qp->sq.max_sges, qp->sq_cq->icid);
1256 }
1257 
1258 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1259 {
1260 	qp->sq.db = dev->db_addr +
1261 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1262 	qp->sq.db_data.data.icid = qp->icid + 1;
1263 	if (!qp->srq) {
1264 		qp->rq.db = dev->db_addr +
1265 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1266 		qp->rq.db_data.data.icid = qp->icid;
1267 	}
1268 }
1269 
1270 static int qedr_check_srq_params(struct qedr_dev *dev,
1271 				 struct ib_srq_init_attr *attrs,
1272 				 struct ib_udata *udata)
1273 {
1274 	struct qedr_device_attr *qattr = &dev->attr;
1275 
1276 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1277 		DP_ERR(dev,
1278 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1279 		       attrs->attr.max_wr, qattr->max_srq_wr);
1280 		return -EINVAL;
1281 	}
1282 
1283 	if (attrs->attr.max_sge > qattr->max_sge) {
1284 		DP_ERR(dev,
1285 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1286 		       attrs->attr.max_sge, qattr->max_sge);
1287 		return -EINVAL;
1288 	}
1289 
1290 	return 0;
1291 }
1292 
1293 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1294 {
1295 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1296 	ib_umem_release(srq->usrq.umem);
1297 	ib_umem_release(srq->prod_umem);
1298 }
1299 
1300 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1301 {
1302 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1303 	struct qedr_dev *dev = srq->dev;
1304 
1305 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1306 
1307 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1308 			  hw_srq->virt_prod_pair_addr,
1309 			  hw_srq->phy_prod_pair_addr);
1310 }
1311 
1312 static int qedr_init_srq_user_params(struct ib_udata *udata,
1313 				     struct qedr_srq *srq,
1314 				     struct qedr_create_srq_ureq *ureq,
1315 				     int access, int dmasync)
1316 {
1317 	struct scatterlist *sg;
1318 	int rc;
1319 
1320 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1321 				  ureq->srq_len, access, dmasync, 1);
1322 	if (rc)
1323 		return rc;
1324 
1325 	srq->prod_umem =
1326 		ib_umem_get(udata, ureq->prod_pair_addr,
1327 			    sizeof(struct rdma_srq_producers), access, dmasync);
1328 	if (IS_ERR(srq->prod_umem)) {
1329 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1330 		ib_umem_release(srq->usrq.umem);
1331 		DP_ERR(srq->dev,
1332 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1333 		       PTR_ERR(srq->prod_umem));
1334 		return PTR_ERR(srq->prod_umem);
1335 	}
1336 
1337 	sg = srq->prod_umem->sg_head.sgl;
1338 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1339 
1340 	return 0;
1341 }
1342 
1343 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1344 					struct qedr_dev *dev,
1345 					struct ib_srq_init_attr *init_attr)
1346 {
1347 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1348 	dma_addr_t phy_prod_pair_addr;
1349 	u32 num_elems;
1350 	void *va;
1351 	int rc;
1352 
1353 	va = dma_alloc_coherent(&dev->pdev->dev,
1354 				sizeof(struct rdma_srq_producers),
1355 				&phy_prod_pair_addr, GFP_KERNEL);
1356 	if (!va) {
1357 		DP_ERR(dev,
1358 		       "create srq: failed to allocate dma memory for producer\n");
1359 		return -ENOMEM;
1360 	}
1361 
1362 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1363 	hw_srq->virt_prod_pair_addr = va;
1364 
1365 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1366 	rc = dev->ops->common->chain_alloc(dev->cdev,
1367 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1368 					   QED_CHAIN_MODE_PBL,
1369 					   QED_CHAIN_CNT_TYPE_U32,
1370 					   num_elems,
1371 					   QEDR_SRQ_WQE_ELEM_SIZE,
1372 					   &hw_srq->pbl, NULL);
1373 	if (rc)
1374 		goto err0;
1375 
1376 	hw_srq->num_elems = num_elems;
1377 
1378 	return 0;
1379 
1380 err0:
1381 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1382 			  va, phy_prod_pair_addr);
1383 	return rc;
1384 }
1385 
1386 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1387 		    struct ib_udata *udata)
1388 {
1389 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1390 	struct qed_rdma_create_srq_in_params in_params = {};
1391 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1392 	struct qed_rdma_create_srq_out_params out_params;
1393 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1394 	struct qedr_create_srq_ureq ureq = {};
1395 	u64 pbl_base_addr, phy_prod_pair_addr;
1396 	struct qedr_srq_hwq_info *hw_srq;
1397 	u32 page_cnt, page_size;
1398 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1399 	int rc = 0;
1400 
1401 	DP_DEBUG(dev, QEDR_MSG_QP,
1402 		 "create SRQ called from %s (pd %p)\n",
1403 		 (udata) ? "User lib" : "kernel", pd);
1404 
1405 	rc = qedr_check_srq_params(dev, init_attr, udata);
1406 	if (rc)
1407 		return -EINVAL;
1408 
1409 	srq->dev = dev;
1410 	hw_srq = &srq->hw_srq;
1411 	spin_lock_init(&srq->lock);
1412 
1413 	hw_srq->max_wr = init_attr->attr.max_wr;
1414 	hw_srq->max_sges = init_attr->attr.max_sge;
1415 
1416 	if (udata) {
1417 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
1418 			DP_ERR(dev,
1419 			       "create srq: problem copying data from user space\n");
1420 			goto err0;
1421 		}
1422 
1423 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0);
1424 		if (rc)
1425 			goto err0;
1426 
1427 		page_cnt = srq->usrq.pbl_info.num_pbes;
1428 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1429 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1430 		page_size = PAGE_SIZE;
1431 	} else {
1432 		struct qed_chain *pbl;
1433 
1434 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1435 		if (rc)
1436 			goto err0;
1437 
1438 		pbl = &hw_srq->pbl;
1439 		page_cnt = qed_chain_get_page_cnt(pbl);
1440 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1441 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1442 		page_size = QED_CHAIN_PAGE_SIZE;
1443 	}
1444 
1445 	in_params.pd_id = pd->pd_id;
1446 	in_params.pbl_base_addr = pbl_base_addr;
1447 	in_params.prod_pair_addr = phy_prod_pair_addr;
1448 	in_params.num_pages = page_cnt;
1449 	in_params.page_size = page_size;
1450 
1451 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1452 	if (rc)
1453 		goto err1;
1454 
1455 	srq->srq_id = out_params.srq_id;
1456 
1457 	if (udata) {
1458 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1459 		if (rc)
1460 			goto err2;
1461 	}
1462 
1463 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1464 	if (rc)
1465 		goto err2;
1466 
1467 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1468 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1469 	return 0;
1470 
1471 err2:
1472 	destroy_in_params.srq_id = srq->srq_id;
1473 
1474 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1475 err1:
1476 	if (udata)
1477 		qedr_free_srq_user_params(srq);
1478 	else
1479 		qedr_free_srq_kernel_params(srq);
1480 err0:
1481 	return -EFAULT;
1482 }
1483 
1484 void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1485 {
1486 	struct qed_rdma_destroy_srq_in_params in_params = {};
1487 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1488 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1489 
1490 	xa_erase_irq(&dev->srqs, srq->srq_id);
1491 	in_params.srq_id = srq->srq_id;
1492 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1493 
1494 	if (ibsrq->uobject)
1495 		qedr_free_srq_user_params(srq);
1496 	else
1497 		qedr_free_srq_kernel_params(srq);
1498 
1499 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1500 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1501 		 srq->srq_id);
1502 }
1503 
1504 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1505 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1506 {
1507 	struct qed_rdma_modify_srq_in_params in_params = {};
1508 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1509 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1510 	int rc;
1511 
1512 	if (attr_mask & IB_SRQ_MAX_WR) {
1513 		DP_ERR(dev,
1514 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1515 		       attr_mask, srq);
1516 		return -EINVAL;
1517 	}
1518 
1519 	if (attr_mask & IB_SRQ_LIMIT) {
1520 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1521 			DP_ERR(dev,
1522 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1523 			       attr->srq_limit, srq->hw_srq.max_wr);
1524 			return -EINVAL;
1525 		}
1526 
1527 		in_params.srq_id = srq->srq_id;
1528 		in_params.wqe_limit = attr->srq_limit;
1529 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1530 		if (rc)
1531 			return rc;
1532 	}
1533 
1534 	srq->srq_limit = attr->srq_limit;
1535 
1536 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1537 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1538 
1539 	return 0;
1540 }
1541 
1542 static inline void
1543 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1544 			      struct qedr_pd *pd,
1545 			      struct qedr_qp *qp,
1546 			      struct ib_qp_init_attr *attrs,
1547 			      bool fmr_and_reserved_lkey,
1548 			      struct qed_rdma_create_qp_in_params *params)
1549 {
1550 	/* QP handle to be written in an async event */
1551 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1552 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1553 
1554 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1555 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1556 	params->pd = pd->pd_id;
1557 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1558 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1559 	params->stats_queue = 0;
1560 	params->srq_id = 0;
1561 	params->use_srq = false;
1562 
1563 	if (!qp->srq) {
1564 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1565 
1566 	} else {
1567 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1568 		params->srq_id = qp->srq->srq_id;
1569 		params->use_srq = true;
1570 	}
1571 }
1572 
1573 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1574 {
1575 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1576 		 "qp=%p. "
1577 		 "sq_addr=0x%llx, "
1578 		 "sq_len=%zd, "
1579 		 "rq_addr=0x%llx, "
1580 		 "rq_len=%zd"
1581 		 "\n",
1582 		 qp,
1583 		 qp->usq.buf_addr,
1584 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1585 }
1586 
1587 static inline void
1588 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1589 			    struct qedr_qp *qp,
1590 			    struct qed_rdma_create_qp_out_params *out_params)
1591 {
1592 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1593 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1594 
1595 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1596 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1597 	if (!qp->srq) {
1598 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1599 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1600 	}
1601 
1602 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1603 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1604 }
1605 
1606 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1607 {
1608 	if (qp->usq.umem)
1609 		ib_umem_release(qp->usq.umem);
1610 	qp->usq.umem = NULL;
1611 
1612 	if (qp->urq.umem)
1613 		ib_umem_release(qp->urq.umem);
1614 	qp->urq.umem = NULL;
1615 }
1616 
1617 static int qedr_create_user_qp(struct qedr_dev *dev,
1618 			       struct qedr_qp *qp,
1619 			       struct ib_pd *ibpd,
1620 			       struct ib_udata *udata,
1621 			       struct ib_qp_init_attr *attrs)
1622 {
1623 	struct qed_rdma_create_qp_in_params in_params;
1624 	struct qed_rdma_create_qp_out_params out_params;
1625 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1626 	struct qedr_create_qp_ureq ureq;
1627 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1628 	int rc = -EINVAL;
1629 
1630 	memset(&ureq, 0, sizeof(ureq));
1631 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1632 	if (rc) {
1633 		DP_ERR(dev, "Problem copying data from user space\n");
1634 		return rc;
1635 	}
1636 
1637 	/* SQ - read access only (0), dma sync not required (0) */
1638 	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1639 				  ureq.sq_len, 0, 0, alloc_and_init);
1640 	if (rc)
1641 		return rc;
1642 
1643 	if (!qp->srq) {
1644 		/* RQ - read access only (0), dma sync not required (0) */
1645 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1646 					  ureq.rq_len, 0, 0, alloc_and_init);
1647 		if (rc)
1648 			return rc;
1649 	}
1650 
1651 	memset(&in_params, 0, sizeof(in_params));
1652 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1653 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1654 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1655 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1656 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1657 	if (!qp->srq) {
1658 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1659 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1660 	}
1661 
1662 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1663 					      &in_params, &out_params);
1664 
1665 	if (!qp->qed_qp) {
1666 		rc = -ENOMEM;
1667 		goto err1;
1668 	}
1669 
1670 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1671 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1672 
1673 	qp->qp_id = out_params.qp_id;
1674 	qp->icid = out_params.icid;
1675 
1676 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1677 	if (rc)
1678 		goto err;
1679 
1680 	qedr_qp_user_print(dev, qp);
1681 
1682 	return 0;
1683 err:
1684 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1685 	if (rc)
1686 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1687 
1688 err1:
1689 	qedr_cleanup_user(dev, qp);
1690 	return rc;
1691 }
1692 
1693 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1694 {
1695 	qp->sq.db = dev->db_addr +
1696 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1697 	qp->sq.db_data.data.icid = qp->icid;
1698 
1699 	qp->rq.db = dev->db_addr +
1700 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1701 	qp->rq.db_data.data.icid = qp->icid;
1702 	qp->rq.iwarp_db2 = dev->db_addr +
1703 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1704 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1705 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1706 }
1707 
1708 static int
1709 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1710 			   struct qedr_qp *qp,
1711 			   struct qed_rdma_create_qp_in_params *in_params,
1712 			   u32 n_sq_elems, u32 n_rq_elems)
1713 {
1714 	struct qed_rdma_create_qp_out_params out_params;
1715 	int rc;
1716 
1717 	rc = dev->ops->common->chain_alloc(dev->cdev,
1718 					   QED_CHAIN_USE_TO_PRODUCE,
1719 					   QED_CHAIN_MODE_PBL,
1720 					   QED_CHAIN_CNT_TYPE_U32,
1721 					   n_sq_elems,
1722 					   QEDR_SQE_ELEMENT_SIZE,
1723 					   &qp->sq.pbl, NULL);
1724 
1725 	if (rc)
1726 		return rc;
1727 
1728 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1729 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1730 
1731 	rc = dev->ops->common->chain_alloc(dev->cdev,
1732 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1733 					   QED_CHAIN_MODE_PBL,
1734 					   QED_CHAIN_CNT_TYPE_U32,
1735 					   n_rq_elems,
1736 					   QEDR_RQE_ELEMENT_SIZE,
1737 					   &qp->rq.pbl, NULL);
1738 	if (rc)
1739 		return rc;
1740 
1741 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1742 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1743 
1744 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1745 					      in_params, &out_params);
1746 
1747 	if (!qp->qed_qp)
1748 		return -EINVAL;
1749 
1750 	qp->qp_id = out_params.qp_id;
1751 	qp->icid = out_params.icid;
1752 
1753 	qedr_set_roce_db_info(dev, qp);
1754 	return rc;
1755 }
1756 
1757 static int
1758 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1759 			    struct qedr_qp *qp,
1760 			    struct qed_rdma_create_qp_in_params *in_params,
1761 			    u32 n_sq_elems, u32 n_rq_elems)
1762 {
1763 	struct qed_rdma_create_qp_out_params out_params;
1764 	struct qed_chain_ext_pbl ext_pbl;
1765 	int rc;
1766 
1767 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1768 						     QEDR_SQE_ELEMENT_SIZE,
1769 						     QED_CHAIN_MODE_PBL);
1770 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1771 						     QEDR_RQE_ELEMENT_SIZE,
1772 						     QED_CHAIN_MODE_PBL);
1773 
1774 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1775 					      in_params, &out_params);
1776 
1777 	if (!qp->qed_qp)
1778 		return -EINVAL;
1779 
1780 	/* Now we allocate the chain */
1781 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1782 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1783 
1784 	rc = dev->ops->common->chain_alloc(dev->cdev,
1785 					   QED_CHAIN_USE_TO_PRODUCE,
1786 					   QED_CHAIN_MODE_PBL,
1787 					   QED_CHAIN_CNT_TYPE_U32,
1788 					   n_sq_elems,
1789 					   QEDR_SQE_ELEMENT_SIZE,
1790 					   &qp->sq.pbl, &ext_pbl);
1791 
1792 	if (rc)
1793 		goto err;
1794 
1795 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1796 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1797 
1798 	rc = dev->ops->common->chain_alloc(dev->cdev,
1799 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1800 					   QED_CHAIN_MODE_PBL,
1801 					   QED_CHAIN_CNT_TYPE_U32,
1802 					   n_rq_elems,
1803 					   QEDR_RQE_ELEMENT_SIZE,
1804 					   &qp->rq.pbl, &ext_pbl);
1805 
1806 	if (rc)
1807 		goto err;
1808 
1809 	qp->qp_id = out_params.qp_id;
1810 	qp->icid = out_params.icid;
1811 
1812 	qedr_set_iwarp_db_info(dev, qp);
1813 	return rc;
1814 
1815 err:
1816 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1817 
1818 	return rc;
1819 }
1820 
1821 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1822 {
1823 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1824 	kfree(qp->wqe_wr_id);
1825 
1826 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1827 	kfree(qp->rqe_wr_id);
1828 }
1829 
1830 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1831 				 struct qedr_qp *qp,
1832 				 struct ib_pd *ibpd,
1833 				 struct ib_qp_init_attr *attrs)
1834 {
1835 	struct qed_rdma_create_qp_in_params in_params;
1836 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1837 	int rc = -EINVAL;
1838 	u32 n_rq_elems;
1839 	u32 n_sq_elems;
1840 	u32 n_sq_entries;
1841 
1842 	memset(&in_params, 0, sizeof(in_params));
1843 
1844 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1845 	 * the ring. The ring should allow at least a single WR, even if the
1846 	 * user requested none, due to allocation issues.
1847 	 * We should add an extra WR since the prod and cons indices of
1848 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1849 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1850 	 * double the number of entries due an iSER issue that pushes far more
1851 	 * WRs than indicated. If we decline its ib_post_send() then we get
1852 	 * error prints in the dmesg we'd like to avoid.
1853 	 */
1854 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1855 			      dev->attr.max_sqe);
1856 
1857 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
1858 				GFP_KERNEL);
1859 	if (!qp->wqe_wr_id) {
1860 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1861 		return -ENOMEM;
1862 	}
1863 
1864 	/* QP handle to be written in CQE */
1865 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1866 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1867 
1868 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1869 	 * the ring. There ring should allow at least a single WR, even if the
1870 	 * user requested none, due to allocation issues.
1871 	 */
1872 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1873 
1874 	/* Allocate driver internal RQ array */
1875 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
1876 				GFP_KERNEL);
1877 	if (!qp->rqe_wr_id) {
1878 		DP_ERR(dev,
1879 		       "create qp: failed RQ shadow memory allocation\n");
1880 		kfree(qp->wqe_wr_id);
1881 		return -ENOMEM;
1882 	}
1883 
1884 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1885 
1886 	n_sq_entries = attrs->cap.max_send_wr;
1887 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1888 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1889 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1890 
1891 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1892 
1893 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1894 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1895 						 n_sq_elems, n_rq_elems);
1896 	else
1897 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1898 						n_sq_elems, n_rq_elems);
1899 	if (rc)
1900 		qedr_cleanup_kernel(dev, qp);
1901 
1902 	return rc;
1903 }
1904 
1905 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1906 			     struct ib_qp_init_attr *attrs,
1907 			     struct ib_udata *udata)
1908 {
1909 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1910 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1911 	struct qedr_qp *qp;
1912 	struct ib_qp *ibqp;
1913 	int rc = 0;
1914 
1915 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1916 		 udata ? "user library" : "kernel", pd);
1917 
1918 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
1919 	if (rc)
1920 		return ERR_PTR(rc);
1921 
1922 	DP_DEBUG(dev, QEDR_MSG_QP,
1923 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1924 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1925 		 get_qedr_cq(attrs->send_cq),
1926 		 get_qedr_cq(attrs->send_cq)->icid,
1927 		 get_qedr_cq(attrs->recv_cq),
1928 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
1929 
1930 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1931 	if (!qp) {
1932 		DP_ERR(dev, "create qp: failed allocating memory\n");
1933 		return ERR_PTR(-ENOMEM);
1934 	}
1935 
1936 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1937 
1938 	if (attrs->qp_type == IB_QPT_GSI) {
1939 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1940 		if (IS_ERR(ibqp))
1941 			kfree(qp);
1942 		return ibqp;
1943 	}
1944 
1945 	if (udata)
1946 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1947 	else
1948 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1949 
1950 	if (rc)
1951 		goto err;
1952 
1953 	qp->ibqp.qp_num = qp->qp_id;
1954 
1955 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1956 		rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
1957 		if (rc)
1958 			goto err;
1959 	}
1960 
1961 	return &qp->ibqp;
1962 
1963 err:
1964 	kfree(qp);
1965 
1966 	return ERR_PTR(-EFAULT);
1967 }
1968 
1969 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1970 {
1971 	switch (qp_state) {
1972 	case QED_ROCE_QP_STATE_RESET:
1973 		return IB_QPS_RESET;
1974 	case QED_ROCE_QP_STATE_INIT:
1975 		return IB_QPS_INIT;
1976 	case QED_ROCE_QP_STATE_RTR:
1977 		return IB_QPS_RTR;
1978 	case QED_ROCE_QP_STATE_RTS:
1979 		return IB_QPS_RTS;
1980 	case QED_ROCE_QP_STATE_SQD:
1981 		return IB_QPS_SQD;
1982 	case QED_ROCE_QP_STATE_ERR:
1983 		return IB_QPS_ERR;
1984 	case QED_ROCE_QP_STATE_SQE:
1985 		return IB_QPS_SQE;
1986 	}
1987 	return IB_QPS_ERR;
1988 }
1989 
1990 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1991 					enum ib_qp_state qp_state)
1992 {
1993 	switch (qp_state) {
1994 	case IB_QPS_RESET:
1995 		return QED_ROCE_QP_STATE_RESET;
1996 	case IB_QPS_INIT:
1997 		return QED_ROCE_QP_STATE_INIT;
1998 	case IB_QPS_RTR:
1999 		return QED_ROCE_QP_STATE_RTR;
2000 	case IB_QPS_RTS:
2001 		return QED_ROCE_QP_STATE_RTS;
2002 	case IB_QPS_SQD:
2003 		return QED_ROCE_QP_STATE_SQD;
2004 	case IB_QPS_ERR:
2005 		return QED_ROCE_QP_STATE_ERR;
2006 	default:
2007 		return QED_ROCE_QP_STATE_ERR;
2008 	}
2009 }
2010 
2011 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2012 {
2013 	qed_chain_reset(&qph->pbl);
2014 	qph->prod = 0;
2015 	qph->cons = 0;
2016 	qph->wqe_cons = 0;
2017 	qph->db_data.data.value = cpu_to_le16(0);
2018 }
2019 
2020 static int qedr_update_qp_state(struct qedr_dev *dev,
2021 				struct qedr_qp *qp,
2022 				enum qed_roce_qp_state cur_state,
2023 				enum qed_roce_qp_state new_state)
2024 {
2025 	int status = 0;
2026 
2027 	if (new_state == cur_state)
2028 		return 0;
2029 
2030 	switch (cur_state) {
2031 	case QED_ROCE_QP_STATE_RESET:
2032 		switch (new_state) {
2033 		case QED_ROCE_QP_STATE_INIT:
2034 			qp->prev_wqe_size = 0;
2035 			qedr_reset_qp_hwq_info(&qp->sq);
2036 			qedr_reset_qp_hwq_info(&qp->rq);
2037 			break;
2038 		default:
2039 			status = -EINVAL;
2040 			break;
2041 		}
2042 		break;
2043 	case QED_ROCE_QP_STATE_INIT:
2044 		switch (new_state) {
2045 		case QED_ROCE_QP_STATE_RTR:
2046 			/* Update doorbell (in case post_recv was
2047 			 * done before move to RTR)
2048 			 */
2049 
2050 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2051 				writel(qp->rq.db_data.raw, qp->rq.db);
2052 			}
2053 			break;
2054 		case QED_ROCE_QP_STATE_ERR:
2055 			break;
2056 		default:
2057 			/* Invalid state change. */
2058 			status = -EINVAL;
2059 			break;
2060 		}
2061 		break;
2062 	case QED_ROCE_QP_STATE_RTR:
2063 		/* RTR->XXX */
2064 		switch (new_state) {
2065 		case QED_ROCE_QP_STATE_RTS:
2066 			break;
2067 		case QED_ROCE_QP_STATE_ERR:
2068 			break;
2069 		default:
2070 			/* Invalid state change. */
2071 			status = -EINVAL;
2072 			break;
2073 		}
2074 		break;
2075 	case QED_ROCE_QP_STATE_RTS:
2076 		/* RTS->XXX */
2077 		switch (new_state) {
2078 		case QED_ROCE_QP_STATE_SQD:
2079 			break;
2080 		case QED_ROCE_QP_STATE_ERR:
2081 			break;
2082 		default:
2083 			/* Invalid state change. */
2084 			status = -EINVAL;
2085 			break;
2086 		}
2087 		break;
2088 	case QED_ROCE_QP_STATE_SQD:
2089 		/* SQD->XXX */
2090 		switch (new_state) {
2091 		case QED_ROCE_QP_STATE_RTS:
2092 		case QED_ROCE_QP_STATE_ERR:
2093 			break;
2094 		default:
2095 			/* Invalid state change. */
2096 			status = -EINVAL;
2097 			break;
2098 		}
2099 		break;
2100 	case QED_ROCE_QP_STATE_ERR:
2101 		/* ERR->XXX */
2102 		switch (new_state) {
2103 		case QED_ROCE_QP_STATE_RESET:
2104 			if ((qp->rq.prod != qp->rq.cons) ||
2105 			    (qp->sq.prod != qp->sq.cons)) {
2106 				DP_NOTICE(dev,
2107 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2108 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2109 					  qp->sq.cons);
2110 				status = -EINVAL;
2111 			}
2112 			break;
2113 		default:
2114 			status = -EINVAL;
2115 			break;
2116 		}
2117 		break;
2118 	default:
2119 		status = -EINVAL;
2120 		break;
2121 	}
2122 
2123 	return status;
2124 }
2125 
2126 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2127 		   int attr_mask, struct ib_udata *udata)
2128 {
2129 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2130 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2131 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2132 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2133 	enum ib_qp_state old_qp_state, new_qp_state;
2134 	enum qed_roce_qp_state cur_state;
2135 	int rc = 0;
2136 
2137 	DP_DEBUG(dev, QEDR_MSG_QP,
2138 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2139 		 attr->qp_state);
2140 
2141 	old_qp_state = qedr_get_ibqp_state(qp->state);
2142 	if (attr_mask & IB_QP_STATE)
2143 		new_qp_state = attr->qp_state;
2144 	else
2145 		new_qp_state = old_qp_state;
2146 
2147 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2148 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2149 					ibqp->qp_type, attr_mask)) {
2150 			DP_ERR(dev,
2151 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2152 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2153 			       attr_mask, qp->qp_id, ibqp->qp_type,
2154 			       old_qp_state, new_qp_state);
2155 			rc = -EINVAL;
2156 			goto err;
2157 		}
2158 	}
2159 
2160 	/* Translate the masks... */
2161 	if (attr_mask & IB_QP_STATE) {
2162 		SET_FIELD(qp_params.modify_flags,
2163 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2164 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2165 	}
2166 
2167 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2168 		qp_params.sqd_async = true;
2169 
2170 	if (attr_mask & IB_QP_PKEY_INDEX) {
2171 		SET_FIELD(qp_params.modify_flags,
2172 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2173 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2174 			rc = -EINVAL;
2175 			goto err;
2176 		}
2177 
2178 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2179 	}
2180 
2181 	if (attr_mask & IB_QP_QKEY)
2182 		qp->qkey = attr->qkey;
2183 
2184 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2185 		SET_FIELD(qp_params.modify_flags,
2186 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2187 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2188 						  IB_ACCESS_REMOTE_READ;
2189 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2190 						   IB_ACCESS_REMOTE_WRITE;
2191 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2192 					       IB_ACCESS_REMOTE_ATOMIC;
2193 	}
2194 
2195 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2196 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2197 			return -EINVAL;
2198 
2199 		if (attr_mask & IB_QP_PATH_MTU) {
2200 			if (attr->path_mtu < IB_MTU_256 ||
2201 			    attr->path_mtu > IB_MTU_4096) {
2202 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2203 				rc = -EINVAL;
2204 				goto err;
2205 			}
2206 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2207 				      ib_mtu_enum_to_int(iboe_get_mtu
2208 							 (dev->ndev->mtu)));
2209 		}
2210 
2211 		if (!qp->mtu) {
2212 			qp->mtu =
2213 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2214 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2215 		}
2216 
2217 		SET_FIELD(qp_params.modify_flags,
2218 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2219 
2220 		qp_params.traffic_class_tos = grh->traffic_class;
2221 		qp_params.flow_label = grh->flow_label;
2222 		qp_params.hop_limit_ttl = grh->hop_limit;
2223 
2224 		qp->sgid_idx = grh->sgid_index;
2225 
2226 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2227 		if (rc) {
2228 			DP_ERR(dev,
2229 			       "modify qp: problems with GID index %d (rc=%d)\n",
2230 			       grh->sgid_index, rc);
2231 			return rc;
2232 		}
2233 
2234 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2235 				   qp_params.remote_mac_addr);
2236 		if (rc)
2237 			return rc;
2238 
2239 		qp_params.use_local_mac = true;
2240 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2241 
2242 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2243 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2244 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2245 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2246 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2247 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2248 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2249 			 qp_params.remote_mac_addr);
2250 
2251 		qp_params.mtu = qp->mtu;
2252 		qp_params.lb_indication = false;
2253 	}
2254 
2255 	if (!qp_params.mtu) {
2256 		/* Stay with current MTU */
2257 		if (qp->mtu)
2258 			qp_params.mtu = qp->mtu;
2259 		else
2260 			qp_params.mtu =
2261 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2262 	}
2263 
2264 	if (attr_mask & IB_QP_TIMEOUT) {
2265 		SET_FIELD(qp_params.modify_flags,
2266 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2267 
2268 		/* The received timeout value is an exponent used like this:
2269 		 *    "12.7.34 LOCAL ACK TIMEOUT
2270 		 *    Value representing the transport (ACK) timeout for use by
2271 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2272 		 * The FW expects timeout in msec so we need to divide the usec
2273 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2274 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2275 		 * The value of zero means infinite so we use a 'max_t' to make
2276 		 * sure that sub 1 msec values will be configured as 1 msec.
2277 		 */
2278 		if (attr->timeout)
2279 			qp_params.ack_timeout =
2280 					1 << max_t(int, attr->timeout - 8, 0);
2281 		else
2282 			qp_params.ack_timeout = 0;
2283 	}
2284 
2285 	if (attr_mask & IB_QP_RETRY_CNT) {
2286 		SET_FIELD(qp_params.modify_flags,
2287 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2288 		qp_params.retry_cnt = attr->retry_cnt;
2289 	}
2290 
2291 	if (attr_mask & IB_QP_RNR_RETRY) {
2292 		SET_FIELD(qp_params.modify_flags,
2293 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2294 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2295 	}
2296 
2297 	if (attr_mask & IB_QP_RQ_PSN) {
2298 		SET_FIELD(qp_params.modify_flags,
2299 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2300 		qp_params.rq_psn = attr->rq_psn;
2301 		qp->rq_psn = attr->rq_psn;
2302 	}
2303 
2304 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2305 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2306 			rc = -EINVAL;
2307 			DP_ERR(dev,
2308 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2309 			       attr->max_rd_atomic,
2310 			       dev->attr.max_qp_req_rd_atomic_resc);
2311 			goto err;
2312 		}
2313 
2314 		SET_FIELD(qp_params.modify_flags,
2315 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2316 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2317 	}
2318 
2319 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2320 		SET_FIELD(qp_params.modify_flags,
2321 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2322 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2323 	}
2324 
2325 	if (attr_mask & IB_QP_SQ_PSN) {
2326 		SET_FIELD(qp_params.modify_flags,
2327 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2328 		qp_params.sq_psn = attr->sq_psn;
2329 		qp->sq_psn = attr->sq_psn;
2330 	}
2331 
2332 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2333 		if (attr->max_dest_rd_atomic >
2334 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2335 			DP_ERR(dev,
2336 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2337 			       attr->max_dest_rd_atomic,
2338 			       dev->attr.max_qp_resp_rd_atomic_resc);
2339 
2340 			rc = -EINVAL;
2341 			goto err;
2342 		}
2343 
2344 		SET_FIELD(qp_params.modify_flags,
2345 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2346 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2347 	}
2348 
2349 	if (attr_mask & IB_QP_DEST_QPN) {
2350 		SET_FIELD(qp_params.modify_flags,
2351 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2352 
2353 		qp_params.dest_qp = attr->dest_qp_num;
2354 		qp->dest_qp_num = attr->dest_qp_num;
2355 	}
2356 
2357 	cur_state = qp->state;
2358 
2359 	/* Update the QP state before the actual ramrod to prevent a race with
2360 	 * fast path. Modifying the QP state to error will cause the device to
2361 	 * flush the CQEs and while polling the flushed CQEs will considered as
2362 	 * a potential issue if the QP isn't in error state.
2363 	 */
2364 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2365 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2366 		qp->state = QED_ROCE_QP_STATE_ERR;
2367 
2368 	if (qp->qp_type != IB_QPT_GSI)
2369 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2370 					      qp->qed_qp, &qp_params);
2371 
2372 	if (attr_mask & IB_QP_STATE) {
2373 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2374 			rc = qedr_update_qp_state(dev, qp, cur_state,
2375 						  qp_params.new_state);
2376 		qp->state = qp_params.new_state;
2377 	}
2378 
2379 err:
2380 	return rc;
2381 }
2382 
2383 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2384 {
2385 	int ib_qp_acc_flags = 0;
2386 
2387 	if (params->incoming_rdma_write_en)
2388 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2389 	if (params->incoming_rdma_read_en)
2390 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2391 	if (params->incoming_atomic_en)
2392 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2393 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2394 	return ib_qp_acc_flags;
2395 }
2396 
2397 int qedr_query_qp(struct ib_qp *ibqp,
2398 		  struct ib_qp_attr *qp_attr,
2399 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2400 {
2401 	struct qed_rdma_query_qp_out_params params;
2402 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2403 	struct qedr_dev *dev = qp->dev;
2404 	int rc = 0;
2405 
2406 	memset(&params, 0, sizeof(params));
2407 
2408 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2409 	if (rc)
2410 		goto err;
2411 
2412 	memset(qp_attr, 0, sizeof(*qp_attr));
2413 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2414 
2415 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2416 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2417 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2418 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2419 	qp_attr->rq_psn = params.rq_psn;
2420 	qp_attr->sq_psn = params.sq_psn;
2421 	qp_attr->dest_qp_num = params.dest_qp;
2422 
2423 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2424 
2425 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2426 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2427 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2428 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2429 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2430 	qp_init_attr->cap = qp_attr->cap;
2431 
2432 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2433 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2434 			params.flow_label, qp->sgid_idx,
2435 			params.hop_limit_ttl, params.traffic_class_tos);
2436 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2437 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2438 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2439 	qp_attr->timeout = params.timeout;
2440 	qp_attr->rnr_retry = params.rnr_retry;
2441 	qp_attr->retry_cnt = params.retry_cnt;
2442 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2443 	qp_attr->pkey_index = params.pkey_index;
2444 	qp_attr->port_num = 1;
2445 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2446 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2447 	qp_attr->alt_pkey_index = 0;
2448 	qp_attr->alt_port_num = 0;
2449 	qp_attr->alt_timeout = 0;
2450 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2451 
2452 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2453 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2454 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2455 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2456 
2457 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2458 		 qp_attr->cap.max_inline_data);
2459 
2460 err:
2461 	return rc;
2462 }
2463 
2464 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2465 				  struct ib_udata *udata)
2466 {
2467 	int rc = 0;
2468 
2469 	if (qp->qp_type != IB_QPT_GSI) {
2470 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2471 		if (rc)
2472 			return rc;
2473 	}
2474 
2475 	if (udata)
2476 		qedr_cleanup_user(dev, qp);
2477 	else
2478 		qedr_cleanup_kernel(dev, qp);
2479 
2480 	return 0;
2481 }
2482 
2483 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2484 {
2485 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2486 	struct qedr_dev *dev = qp->dev;
2487 	struct ib_qp_attr attr;
2488 	int attr_mask = 0;
2489 	int rc = 0;
2490 
2491 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2492 		 qp, qp->qp_type);
2493 
2494 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2495 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2496 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2497 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2498 
2499 			attr.qp_state = IB_QPS_ERR;
2500 			attr_mask |= IB_QP_STATE;
2501 
2502 			/* Change the QP state to ERROR */
2503 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2504 		}
2505 	} else {
2506 		/* Wait for the connect/accept to complete */
2507 		if (qp->ep) {
2508 			int wait_count = 1;
2509 
2510 			while (qp->ep->during_connect) {
2511 				DP_DEBUG(dev, QEDR_MSG_QP,
2512 					 "Still in during connect/accept\n");
2513 
2514 				msleep(100);
2515 				if (wait_count++ > 200) {
2516 					DP_NOTICE(dev,
2517 						  "during connect timeout\n");
2518 					break;
2519 				}
2520 			}
2521 		}
2522 	}
2523 
2524 	if (qp->qp_type == IB_QPT_GSI)
2525 		qedr_destroy_gsi_qp(dev);
2526 
2527 	qedr_free_qp_resources(dev, qp, udata);
2528 
2529 	if (atomic_dec_and_test(&qp->refcnt) &&
2530 	    rdma_protocol_iwarp(&dev->ibdev, 1)) {
2531 		xa_erase_irq(&dev->qps, qp->qp_id);
2532 		kfree(qp);
2533 	}
2534 	return rc;
2535 }
2536 
2537 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
2538 		   struct ib_udata *udata)
2539 {
2540 	struct qedr_ah *ah = get_qedr_ah(ibah);
2541 
2542 	rdma_copy_ah_attr(&ah->attr, attr);
2543 
2544 	return 0;
2545 }
2546 
2547 void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2548 {
2549 	struct qedr_ah *ah = get_qedr_ah(ibah);
2550 
2551 	rdma_destroy_ah_attr(&ah->attr);
2552 }
2553 
2554 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2555 {
2556 	struct qedr_pbl *pbl, *tmp;
2557 
2558 	if (info->pbl_table)
2559 		list_add_tail(&info->pbl_table->list_entry,
2560 			      &info->free_pbl_list);
2561 
2562 	if (!list_empty(&info->inuse_pbl_list))
2563 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2564 
2565 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2566 		list_del(&pbl->list_entry);
2567 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2568 	}
2569 }
2570 
2571 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2572 			size_t page_list_len, bool two_layered)
2573 {
2574 	struct qedr_pbl *tmp;
2575 	int rc;
2576 
2577 	INIT_LIST_HEAD(&info->free_pbl_list);
2578 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2579 
2580 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2581 				  page_list_len, two_layered);
2582 	if (rc)
2583 		goto done;
2584 
2585 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2586 	if (IS_ERR(info->pbl_table)) {
2587 		rc = PTR_ERR(info->pbl_table);
2588 		goto done;
2589 	}
2590 
2591 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2592 		 &info->pbl_table->pa);
2593 
2594 	/* in usual case we use 2 PBLs, so we add one to free
2595 	 * list and allocating another one
2596 	 */
2597 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2598 	if (IS_ERR(tmp)) {
2599 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2600 		goto done;
2601 	}
2602 
2603 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2604 
2605 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2606 
2607 done:
2608 	if (rc)
2609 		free_mr_info(dev, info);
2610 
2611 	return rc;
2612 }
2613 
2614 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2615 			       u64 usr_addr, int acc, struct ib_udata *udata)
2616 {
2617 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2618 	struct qedr_mr *mr;
2619 	struct qedr_pd *pd;
2620 	int rc = -ENOMEM;
2621 
2622 	pd = get_qedr_pd(ibpd);
2623 	DP_DEBUG(dev, QEDR_MSG_MR,
2624 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2625 		 pd->pd_id, start, len, usr_addr, acc);
2626 
2627 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2628 		return ERR_PTR(-EINVAL);
2629 
2630 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2631 	if (!mr)
2632 		return ERR_PTR(rc);
2633 
2634 	mr->type = QEDR_MR_USER;
2635 
2636 	mr->umem = ib_umem_get(udata, start, len, acc, 0);
2637 	if (IS_ERR(mr->umem)) {
2638 		rc = -EFAULT;
2639 		goto err0;
2640 	}
2641 
2642 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2643 	if (rc)
2644 		goto err1;
2645 
2646 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2647 			   &mr->info.pbl_info, PAGE_SHIFT);
2648 
2649 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2650 	if (rc) {
2651 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2652 		goto err1;
2653 	}
2654 
2655 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2656 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2657 	mr->hw_mr.key = 0;
2658 	mr->hw_mr.pd = pd->pd_id;
2659 	mr->hw_mr.local_read = 1;
2660 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2661 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2662 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2663 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2664 	mr->hw_mr.mw_bind = false;
2665 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2666 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2667 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2668 	mr->hw_mr.page_size_log = PAGE_SHIFT;
2669 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2670 	mr->hw_mr.length = len;
2671 	mr->hw_mr.vaddr = usr_addr;
2672 	mr->hw_mr.zbva = false;
2673 	mr->hw_mr.phy_mr = false;
2674 	mr->hw_mr.dma_mr = false;
2675 
2676 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2677 	if (rc) {
2678 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2679 		goto err2;
2680 	}
2681 
2682 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2683 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2684 	    mr->hw_mr.remote_atomic)
2685 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2686 
2687 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2688 		 mr->ibmr.lkey);
2689 	return &mr->ibmr;
2690 
2691 err2:
2692 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2693 err1:
2694 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2695 err0:
2696 	kfree(mr);
2697 	return ERR_PTR(rc);
2698 }
2699 
2700 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2701 {
2702 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2703 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2704 	int rc = 0;
2705 
2706 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2707 	if (rc)
2708 		return rc;
2709 
2710 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2711 
2712 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2713 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2714 
2715 	/* it could be user registered memory. */
2716 	if (mr->umem)
2717 		ib_umem_release(mr->umem);
2718 
2719 	kfree(mr);
2720 
2721 	return rc;
2722 }
2723 
2724 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2725 				       int max_page_list_len)
2726 {
2727 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2728 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2729 	struct qedr_mr *mr;
2730 	int rc = -ENOMEM;
2731 
2732 	DP_DEBUG(dev, QEDR_MSG_MR,
2733 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2734 		 max_page_list_len);
2735 
2736 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2737 	if (!mr)
2738 		return ERR_PTR(rc);
2739 
2740 	mr->dev = dev;
2741 	mr->type = QEDR_MR_FRMR;
2742 
2743 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2744 	if (rc)
2745 		goto err0;
2746 
2747 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2748 	if (rc) {
2749 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2750 		goto err0;
2751 	}
2752 
2753 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2754 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2755 	mr->hw_mr.key = 0;
2756 	mr->hw_mr.pd = pd->pd_id;
2757 	mr->hw_mr.local_read = 1;
2758 	mr->hw_mr.local_write = 0;
2759 	mr->hw_mr.remote_read = 0;
2760 	mr->hw_mr.remote_write = 0;
2761 	mr->hw_mr.remote_atomic = 0;
2762 	mr->hw_mr.mw_bind = false;
2763 	mr->hw_mr.pbl_ptr = 0;
2764 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2765 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2766 	mr->hw_mr.fbo = 0;
2767 	mr->hw_mr.length = 0;
2768 	mr->hw_mr.vaddr = 0;
2769 	mr->hw_mr.zbva = false;
2770 	mr->hw_mr.phy_mr = true;
2771 	mr->hw_mr.dma_mr = false;
2772 
2773 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2774 	if (rc) {
2775 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2776 		goto err1;
2777 	}
2778 
2779 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2780 	mr->ibmr.rkey = mr->ibmr.lkey;
2781 
2782 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2783 	return mr;
2784 
2785 err1:
2786 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2787 err0:
2788 	kfree(mr);
2789 	return ERR_PTR(rc);
2790 }
2791 
2792 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
2793 			    u32 max_num_sg, struct ib_udata *udata)
2794 {
2795 	struct qedr_mr *mr;
2796 
2797 	if (mr_type != IB_MR_TYPE_MEM_REG)
2798 		return ERR_PTR(-EINVAL);
2799 
2800 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2801 
2802 	if (IS_ERR(mr))
2803 		return ERR_PTR(-EINVAL);
2804 
2805 	return &mr->ibmr;
2806 }
2807 
2808 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2809 {
2810 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2811 	struct qedr_pbl *pbl_table;
2812 	struct regpair *pbe;
2813 	u32 pbes_in_page;
2814 
2815 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2816 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2817 		return -ENOMEM;
2818 	}
2819 
2820 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2821 		 mr->npages, addr);
2822 
2823 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2824 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2825 	pbe = (struct regpair *)pbl_table->va;
2826 	pbe +=  mr->npages % pbes_in_page;
2827 	pbe->lo = cpu_to_le32((u32)addr);
2828 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2829 
2830 	mr->npages++;
2831 
2832 	return 0;
2833 }
2834 
2835 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2836 {
2837 	int work = info->completed - info->completed_handled - 1;
2838 
2839 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2840 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2841 		struct qedr_pbl *pbl;
2842 
2843 		/* Free all the page list that are possible to be freed
2844 		 * (all the ones that were invalidated), under the assumption
2845 		 * that if an FMR was completed successfully that means that
2846 		 * if there was an invalidate operation before it also ended
2847 		 */
2848 		pbl = list_first_entry(&info->inuse_pbl_list,
2849 				       struct qedr_pbl, list_entry);
2850 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2851 		info->completed_handled++;
2852 	}
2853 }
2854 
2855 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2856 		   int sg_nents, unsigned int *sg_offset)
2857 {
2858 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2859 
2860 	mr->npages = 0;
2861 
2862 	handle_completed_mrs(mr->dev, &mr->info);
2863 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2864 }
2865 
2866 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2867 {
2868 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2869 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2870 	struct qedr_mr *mr;
2871 	int rc;
2872 
2873 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2874 	if (!mr)
2875 		return ERR_PTR(-ENOMEM);
2876 
2877 	mr->type = QEDR_MR_DMA;
2878 
2879 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2880 	if (rc) {
2881 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2882 		goto err1;
2883 	}
2884 
2885 	/* index only, 18 bit long, lkey = itid << 8 | key */
2886 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2887 	mr->hw_mr.pd = pd->pd_id;
2888 	mr->hw_mr.local_read = 1;
2889 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2890 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2891 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2892 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2893 	mr->hw_mr.dma_mr = true;
2894 
2895 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2896 	if (rc) {
2897 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2898 		goto err2;
2899 	}
2900 
2901 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2902 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2903 	    mr->hw_mr.remote_atomic)
2904 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2905 
2906 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2907 	return &mr->ibmr;
2908 
2909 err2:
2910 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2911 err1:
2912 	kfree(mr);
2913 	return ERR_PTR(rc);
2914 }
2915 
2916 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2917 {
2918 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2919 }
2920 
2921 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2922 {
2923 	int i, len = 0;
2924 
2925 	for (i = 0; i < num_sge; i++)
2926 		len += sg_list[i].length;
2927 
2928 	return len;
2929 }
2930 
2931 static void swap_wqe_data64(u64 *p)
2932 {
2933 	int i;
2934 
2935 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2936 		*p = cpu_to_be64(cpu_to_le64(*p));
2937 }
2938 
2939 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2940 				       struct qedr_qp *qp, u8 *wqe_size,
2941 				       const struct ib_send_wr *wr,
2942 				       const struct ib_send_wr **bad_wr,
2943 				       u8 *bits, u8 bit)
2944 {
2945 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2946 	char *seg_prt, *wqe;
2947 	int i, seg_siz;
2948 
2949 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2950 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2951 		*bad_wr = wr;
2952 		return 0;
2953 	}
2954 
2955 	if (!data_size)
2956 		return data_size;
2957 
2958 	*bits |= bit;
2959 
2960 	seg_prt = NULL;
2961 	wqe = NULL;
2962 	seg_siz = 0;
2963 
2964 	/* Copy data inline */
2965 	for (i = 0; i < wr->num_sge; i++) {
2966 		u32 len = wr->sg_list[i].length;
2967 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2968 
2969 		while (len > 0) {
2970 			u32 cur;
2971 
2972 			/* New segment required */
2973 			if (!seg_siz) {
2974 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2975 				seg_prt = wqe;
2976 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2977 				(*wqe_size)++;
2978 			}
2979 
2980 			/* Calculate currently allowed length */
2981 			cur = min_t(u32, len, seg_siz);
2982 			memcpy(seg_prt, src, cur);
2983 
2984 			/* Update segment variables */
2985 			seg_prt += cur;
2986 			seg_siz -= cur;
2987 
2988 			/* Update sge variables */
2989 			src += cur;
2990 			len -= cur;
2991 
2992 			/* Swap fully-completed segments */
2993 			if (!seg_siz)
2994 				swap_wqe_data64((u64 *)wqe);
2995 		}
2996 	}
2997 
2998 	/* swap last not completed segment */
2999 	if (seg_siz)
3000 		swap_wqe_data64((u64 *)wqe);
3001 
3002 	return data_size;
3003 }
3004 
3005 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3006 	do {							\
3007 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3008 		(sge)->length = cpu_to_le32(vlength);		\
3009 		(sge)->flags = cpu_to_le32(vflags);		\
3010 	} while (0)
3011 
3012 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3013 	do {							\
3014 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3015 		(hdr)->num_sges = num_sge;			\
3016 	} while (0)
3017 
3018 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3019 	do {							\
3020 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3021 		(sge)->length = cpu_to_le32(vlength);		\
3022 		(sge)->l_key = cpu_to_le32(vlkey);		\
3023 	} while (0)
3024 
3025 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3026 				const struct ib_send_wr *wr)
3027 {
3028 	u32 data_size = 0;
3029 	int i;
3030 
3031 	for (i = 0; i < wr->num_sge; i++) {
3032 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3033 
3034 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3035 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3036 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3037 		data_size += wr->sg_list[i].length;
3038 	}
3039 
3040 	if (wqe_size)
3041 		*wqe_size += wr->num_sge;
3042 
3043 	return data_size;
3044 }
3045 
3046 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3047 				     struct qedr_qp *qp,
3048 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3049 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3050 				     const struct ib_send_wr *wr,
3051 				     const struct ib_send_wr **bad_wr)
3052 {
3053 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3054 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3055 
3056 	if (wr->send_flags & IB_SEND_INLINE &&
3057 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3058 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3059 		u8 flags = 0;
3060 
3061 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3062 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3063 						   bad_wr, &rwqe->flags, flags);
3064 	}
3065 
3066 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3067 }
3068 
3069 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3070 				     struct qedr_qp *qp,
3071 				     struct rdma_sq_send_wqe_1st *swqe,
3072 				     struct rdma_sq_send_wqe_2st *swqe2,
3073 				     const struct ib_send_wr *wr,
3074 				     const struct ib_send_wr **bad_wr)
3075 {
3076 	memset(swqe2, 0, sizeof(*swqe2));
3077 	if (wr->send_flags & IB_SEND_INLINE) {
3078 		u8 flags = 0;
3079 
3080 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3081 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3082 						   bad_wr, &swqe->flags, flags);
3083 	}
3084 
3085 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3086 }
3087 
3088 static int qedr_prepare_reg(struct qedr_qp *qp,
3089 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3090 			    const struct ib_reg_wr *wr)
3091 {
3092 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3093 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3094 
3095 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3096 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3097 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3098 	fwqe1->l_key = wr->key;
3099 
3100 	fwqe2->access_ctrl = 0;
3101 
3102 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3103 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3104 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3105 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3106 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3107 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3108 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3109 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3110 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3111 	fwqe2->fmr_ctrl = 0;
3112 
3113 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3114 		   ilog2(mr->ibmr.page_size) - 12);
3115 
3116 	fwqe2->length_hi = 0;
3117 	fwqe2->length_lo = mr->ibmr.length;
3118 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3119 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3120 
3121 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3122 
3123 	return 0;
3124 }
3125 
3126 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3127 {
3128 	switch (opcode) {
3129 	case IB_WR_RDMA_WRITE:
3130 	case IB_WR_RDMA_WRITE_WITH_IMM:
3131 		return IB_WC_RDMA_WRITE;
3132 	case IB_WR_SEND_WITH_IMM:
3133 	case IB_WR_SEND:
3134 	case IB_WR_SEND_WITH_INV:
3135 		return IB_WC_SEND;
3136 	case IB_WR_RDMA_READ:
3137 	case IB_WR_RDMA_READ_WITH_INV:
3138 		return IB_WC_RDMA_READ;
3139 	case IB_WR_ATOMIC_CMP_AND_SWP:
3140 		return IB_WC_COMP_SWAP;
3141 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3142 		return IB_WC_FETCH_ADD;
3143 	case IB_WR_REG_MR:
3144 		return IB_WC_REG_MR;
3145 	case IB_WR_LOCAL_INV:
3146 		return IB_WC_LOCAL_INV;
3147 	default:
3148 		return IB_WC_SEND;
3149 	}
3150 }
3151 
3152 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3153 				      const struct ib_send_wr *wr)
3154 {
3155 	int wq_is_full, err_wr, pbl_is_full;
3156 	struct qedr_dev *dev = qp->dev;
3157 
3158 	/* prevent SQ overflow and/or processing of a bad WR */
3159 	err_wr = wr->num_sge > qp->sq.max_sges;
3160 	wq_is_full = qedr_wq_is_full(&qp->sq);
3161 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3162 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3163 	if (wq_is_full || err_wr || pbl_is_full) {
3164 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3165 			DP_ERR(dev,
3166 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3167 			       qp);
3168 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3169 		}
3170 
3171 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3172 			DP_ERR(dev,
3173 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3174 			       qp);
3175 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3176 		}
3177 
3178 		if (pbl_is_full &&
3179 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3180 			DP_ERR(dev,
3181 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3182 			       qp);
3183 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3184 		}
3185 		return false;
3186 	}
3187 	return true;
3188 }
3189 
3190 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3191 			    const struct ib_send_wr **bad_wr)
3192 {
3193 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3194 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3195 	struct rdma_sq_atomic_wqe_1st *awqe1;
3196 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3197 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3198 	struct rdma_sq_send_wqe_2st *swqe2;
3199 	struct rdma_sq_local_inv_wqe *iwqe;
3200 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3201 	struct rdma_sq_send_wqe_1st *swqe;
3202 	struct rdma_sq_rdma_wqe_1st *rwqe;
3203 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3204 	struct rdma_sq_common_wqe *wqe;
3205 	u32 length;
3206 	int rc = 0;
3207 	bool comp;
3208 
3209 	if (!qedr_can_post_send(qp, wr)) {
3210 		*bad_wr = wr;
3211 		return -ENOMEM;
3212 	}
3213 
3214 	wqe = qed_chain_produce(&qp->sq.pbl);
3215 	qp->wqe_wr_id[qp->sq.prod].signaled =
3216 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3217 
3218 	wqe->flags = 0;
3219 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3220 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3221 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3222 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3223 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3224 		   !!(wr->send_flags & IB_SEND_FENCE));
3225 	wqe->prev_wqe_size = qp->prev_wqe_size;
3226 
3227 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3228 
3229 	switch (wr->opcode) {
3230 	case IB_WR_SEND_WITH_IMM:
3231 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3232 			rc = -EINVAL;
3233 			*bad_wr = wr;
3234 			break;
3235 		}
3236 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3237 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3238 		swqe->wqe_size = 2;
3239 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3240 
3241 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3242 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3243 						   wr, bad_wr);
3244 		swqe->length = cpu_to_le32(length);
3245 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3246 		qp->prev_wqe_size = swqe->wqe_size;
3247 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3248 		break;
3249 	case IB_WR_SEND:
3250 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3251 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3252 
3253 		swqe->wqe_size = 2;
3254 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3255 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3256 						   wr, bad_wr);
3257 		swqe->length = cpu_to_le32(length);
3258 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3259 		qp->prev_wqe_size = swqe->wqe_size;
3260 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3261 		break;
3262 	case IB_WR_SEND_WITH_INV:
3263 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3264 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3265 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3266 		swqe->wqe_size = 2;
3267 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3268 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3269 						   wr, bad_wr);
3270 		swqe->length = cpu_to_le32(length);
3271 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3272 		qp->prev_wqe_size = swqe->wqe_size;
3273 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3274 		break;
3275 
3276 	case IB_WR_RDMA_WRITE_WITH_IMM:
3277 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3278 			rc = -EINVAL;
3279 			*bad_wr = wr;
3280 			break;
3281 		}
3282 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3283 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3284 
3285 		rwqe->wqe_size = 2;
3286 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3287 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3288 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3289 						   wr, bad_wr);
3290 		rwqe->length = cpu_to_le32(length);
3291 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3292 		qp->prev_wqe_size = rwqe->wqe_size;
3293 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3294 		break;
3295 	case IB_WR_RDMA_WRITE:
3296 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3297 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3298 
3299 		rwqe->wqe_size = 2;
3300 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3301 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3302 						   wr, bad_wr);
3303 		rwqe->length = cpu_to_le32(length);
3304 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3305 		qp->prev_wqe_size = rwqe->wqe_size;
3306 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3307 		break;
3308 	case IB_WR_RDMA_READ_WITH_INV:
3309 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3310 		/* fallthrough -- same is identical to RDMA READ */
3311 
3312 	case IB_WR_RDMA_READ:
3313 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3314 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3315 
3316 		rwqe->wqe_size = 2;
3317 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3318 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3319 						   wr, bad_wr);
3320 		rwqe->length = cpu_to_le32(length);
3321 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3322 		qp->prev_wqe_size = rwqe->wqe_size;
3323 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3324 		break;
3325 
3326 	case IB_WR_ATOMIC_CMP_AND_SWP:
3327 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3328 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3329 		awqe1->wqe_size = 4;
3330 
3331 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3332 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3333 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3334 
3335 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3336 
3337 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3338 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3339 			DMA_REGPAIR_LE(awqe3->swap_data,
3340 				       atomic_wr(wr)->compare_add);
3341 		} else {
3342 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3343 			DMA_REGPAIR_LE(awqe3->swap_data,
3344 				       atomic_wr(wr)->swap);
3345 			DMA_REGPAIR_LE(awqe3->cmp_data,
3346 				       atomic_wr(wr)->compare_add);
3347 		}
3348 
3349 		qedr_prepare_sq_sges(qp, NULL, wr);
3350 
3351 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3352 		qp->prev_wqe_size = awqe1->wqe_size;
3353 		break;
3354 
3355 	case IB_WR_LOCAL_INV:
3356 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3357 		iwqe->wqe_size = 1;
3358 
3359 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3360 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3361 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3362 		qp->prev_wqe_size = iwqe->wqe_size;
3363 		break;
3364 	case IB_WR_REG_MR:
3365 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3366 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3367 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3368 		fwqe1->wqe_size = 2;
3369 
3370 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3371 		if (rc) {
3372 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3373 			*bad_wr = wr;
3374 			break;
3375 		}
3376 
3377 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3378 		qp->prev_wqe_size = fwqe1->wqe_size;
3379 		break;
3380 	default:
3381 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3382 		rc = -EINVAL;
3383 		*bad_wr = wr;
3384 		break;
3385 	}
3386 
3387 	if (*bad_wr) {
3388 		u16 value;
3389 
3390 		/* Restore prod to its position before
3391 		 * this WR was processed
3392 		 */
3393 		value = le16_to_cpu(qp->sq.db_data.data.value);
3394 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3395 
3396 		/* Restore prev_wqe_size */
3397 		qp->prev_wqe_size = wqe->prev_wqe_size;
3398 		rc = -EINVAL;
3399 		DP_ERR(dev, "POST SEND FAILED\n");
3400 	}
3401 
3402 	return rc;
3403 }
3404 
3405 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3406 		   const struct ib_send_wr **bad_wr)
3407 {
3408 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3409 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3410 	unsigned long flags;
3411 	int rc = 0;
3412 
3413 	*bad_wr = NULL;
3414 
3415 	if (qp->qp_type == IB_QPT_GSI)
3416 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3417 
3418 	spin_lock_irqsave(&qp->q_lock, flags);
3419 
3420 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3421 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3422 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3423 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3424 			spin_unlock_irqrestore(&qp->q_lock, flags);
3425 			*bad_wr = wr;
3426 			DP_DEBUG(dev, QEDR_MSG_CQ,
3427 				 "QP in wrong state! QP icid=0x%x state %d\n",
3428 				 qp->icid, qp->state);
3429 			return -EINVAL;
3430 		}
3431 	}
3432 
3433 	while (wr) {
3434 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3435 		if (rc)
3436 			break;
3437 
3438 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3439 
3440 		qedr_inc_sw_prod(&qp->sq);
3441 
3442 		qp->sq.db_data.data.value++;
3443 
3444 		wr = wr->next;
3445 	}
3446 
3447 	/* Trigger doorbell
3448 	 * If there was a failure in the first WR then it will be triggered in
3449 	 * vane. However this is not harmful (as long as the producer value is
3450 	 * unchanged). For performance reasons we avoid checking for this
3451 	 * redundant doorbell.
3452 	 *
3453 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3454 	 * soon as we give the doorbell, we could get a completion
3455 	 * for this wr, therefore we need to make sure that the
3456 	 * memory is updated before giving the doorbell.
3457 	 * During qedr_poll_cq, rmb is called before accessing the
3458 	 * cqe. This covers for the smp_rmb as well.
3459 	 */
3460 	smp_wmb();
3461 	writel(qp->sq.db_data.raw, qp->sq.db);
3462 
3463 	spin_unlock_irqrestore(&qp->q_lock, flags);
3464 
3465 	return rc;
3466 }
3467 
3468 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3469 {
3470 	u32 used;
3471 
3472 	/* Calculate number of elements used based on producer
3473 	 * count and consumer count and subtract it from max
3474 	 * work request supported so that we get elements left.
3475 	 */
3476 	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3477 
3478 	return hw_srq->max_wr - used;
3479 }
3480 
3481 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3482 		       const struct ib_recv_wr **bad_wr)
3483 {
3484 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3485 	struct qedr_srq_hwq_info *hw_srq;
3486 	struct qedr_dev *dev = srq->dev;
3487 	struct qed_chain *pbl;
3488 	unsigned long flags;
3489 	int status = 0;
3490 	u32 num_sge;
3491 	u32 offset;
3492 
3493 	spin_lock_irqsave(&srq->lock, flags);
3494 
3495 	hw_srq = &srq->hw_srq;
3496 	pbl = &srq->hw_srq.pbl;
3497 	while (wr) {
3498 		struct rdma_srq_wqe_header *hdr;
3499 		int i;
3500 
3501 		if (!qedr_srq_elem_left(hw_srq) ||
3502 		    wr->num_sge > srq->hw_srq.max_sges) {
3503 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3504 			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3505 			       wr->num_sge, srq->hw_srq.max_sges);
3506 			status = -ENOMEM;
3507 			*bad_wr = wr;
3508 			break;
3509 		}
3510 
3511 		hdr = qed_chain_produce(pbl);
3512 		num_sge = wr->num_sge;
3513 		/* Set number of sge and work request id in header */
3514 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3515 
3516 		srq->hw_srq.wr_prod_cnt++;
3517 		hw_srq->wqe_prod++;
3518 		hw_srq->sge_prod++;
3519 
3520 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3521 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3522 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3523 
3524 		for (i = 0; i < wr->num_sge; i++) {
3525 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3526 
3527 			/* Set SGE length, lkey and address */
3528 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3529 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3530 
3531 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3532 				 "[%d]: len %d key %x addr %x:%x\n",
3533 				 i, srq_sge->length, srq_sge->l_key,
3534 				 srq_sge->addr.hi, srq_sge->addr.lo);
3535 			hw_srq->sge_prod++;
3536 		}
3537 
3538 		/* Flush WQE and SGE information before
3539 		 * updating producer.
3540 		 */
3541 		wmb();
3542 
3543 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3544 		 * in first 4 bytes and need to update WQE producer in
3545 		 * next 4 bytes.
3546 		 */
3547 		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3548 		offset = offsetof(struct rdma_srq_producers, wqe_prod);
3549 		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3550 			hw_srq->wqe_prod;
3551 
3552 		/* Flush producer after updating it. */
3553 		wmb();
3554 		wr = wr->next;
3555 	}
3556 
3557 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3558 		 qed_chain_get_elem_left(pbl));
3559 	spin_unlock_irqrestore(&srq->lock, flags);
3560 
3561 	return status;
3562 }
3563 
3564 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3565 		   const struct ib_recv_wr **bad_wr)
3566 {
3567 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3568 	struct qedr_dev *dev = qp->dev;
3569 	unsigned long flags;
3570 	int status = 0;
3571 
3572 	if (qp->qp_type == IB_QPT_GSI)
3573 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3574 
3575 	spin_lock_irqsave(&qp->q_lock, flags);
3576 
3577 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3578 		spin_unlock_irqrestore(&qp->q_lock, flags);
3579 		*bad_wr = wr;
3580 		return -EINVAL;
3581 	}
3582 
3583 	while (wr) {
3584 		int i;
3585 
3586 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3587 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3588 		    wr->num_sge > qp->rq.max_sges) {
3589 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3590 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3591 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3592 			       qp->rq.max_sges);
3593 			status = -ENOMEM;
3594 			*bad_wr = wr;
3595 			break;
3596 		}
3597 		for (i = 0; i < wr->num_sge; i++) {
3598 			u32 flags = 0;
3599 			struct rdma_rq_sge *rqe =
3600 			    qed_chain_produce(&qp->rq.pbl);
3601 
3602 			/* First one must include the number
3603 			 * of SGE in the list
3604 			 */
3605 			if (!i)
3606 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3607 					  wr->num_sge);
3608 
3609 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3610 				  wr->sg_list[i].lkey);
3611 
3612 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3613 				   wr->sg_list[i].length, flags);
3614 		}
3615 
3616 		/* Special case of no sges. FW requires between 1-4 sges...
3617 		 * in this case we need to post 1 sge with length zero. this is
3618 		 * because rdma write with immediate consumes an RQ.
3619 		 */
3620 		if (!wr->num_sge) {
3621 			u32 flags = 0;
3622 			struct rdma_rq_sge *rqe =
3623 			    qed_chain_produce(&qp->rq.pbl);
3624 
3625 			/* First one must include the number
3626 			 * of SGE in the list
3627 			 */
3628 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3629 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3630 
3631 			RQ_SGE_SET(rqe, 0, 0, flags);
3632 			i = 1;
3633 		}
3634 
3635 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3636 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3637 
3638 		qedr_inc_sw_prod(&qp->rq);
3639 
3640 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3641 		 * soon as we give the doorbell, we could get a completion
3642 		 * for this wr, therefore we need to make sure that the
3643 		 * memory is update before giving the doorbell.
3644 		 * During qedr_poll_cq, rmb is called before accessing the
3645 		 * cqe. This covers for the smp_rmb as well.
3646 		 */
3647 		smp_wmb();
3648 
3649 		qp->rq.db_data.data.value++;
3650 
3651 		writel(qp->rq.db_data.raw, qp->rq.db);
3652 
3653 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3654 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3655 		}
3656 
3657 		wr = wr->next;
3658 	}
3659 
3660 	spin_unlock_irqrestore(&qp->q_lock, flags);
3661 
3662 	return status;
3663 }
3664 
3665 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3666 {
3667 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3668 
3669 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3670 		cq->pbl_toggle;
3671 }
3672 
3673 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3674 {
3675 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3676 	struct qedr_qp *qp;
3677 
3678 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3679 						   resp_cqe->qp_handle.lo,
3680 						   u64);
3681 	return qp;
3682 }
3683 
3684 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3685 {
3686 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3687 
3688 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3689 }
3690 
3691 /* Return latest CQE (needs processing) */
3692 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3693 {
3694 	return cq->latest_cqe;
3695 }
3696 
3697 /* In fmr we need to increase the number of fmr completed counter for the fmr
3698  * algorithm determining whether we can free a pbl or not.
3699  * we need to perform this whether the work request was signaled or not. for
3700  * this purpose we call this function from the condition that checks if a wr
3701  * should be skipped, to make sure we don't miss it ( possibly this fmr
3702  * operation was not signalted)
3703  */
3704 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3705 {
3706 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3707 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3708 }
3709 
3710 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3711 		       struct qedr_cq *cq, int num_entries,
3712 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3713 		       int force)
3714 {
3715 	u16 cnt = 0;
3716 
3717 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3718 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3719 			qedr_chk_if_fmr(qp);
3720 			/* skip WC */
3721 			goto next_cqe;
3722 		}
3723 
3724 		/* fill WC */
3725 		wc->status = status;
3726 		wc->vendor_err = 0;
3727 		wc->wc_flags = 0;
3728 		wc->src_qp = qp->id;
3729 		wc->qp = &qp->ibqp;
3730 
3731 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3732 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3733 
3734 		switch (wc->opcode) {
3735 		case IB_WC_RDMA_WRITE:
3736 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3737 			break;
3738 		case IB_WC_COMP_SWAP:
3739 		case IB_WC_FETCH_ADD:
3740 			wc->byte_len = 8;
3741 			break;
3742 		case IB_WC_REG_MR:
3743 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3744 			break;
3745 		case IB_WC_RDMA_READ:
3746 		case IB_WC_SEND:
3747 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3748 			break;
3749 		default:
3750 			break;
3751 		}
3752 
3753 		num_entries--;
3754 		wc++;
3755 		cnt++;
3756 next_cqe:
3757 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3758 			qed_chain_consume(&qp->sq.pbl);
3759 		qedr_inc_sw_cons(&qp->sq);
3760 	}
3761 
3762 	return cnt;
3763 }
3764 
3765 static int qedr_poll_cq_req(struct qedr_dev *dev,
3766 			    struct qedr_qp *qp, struct qedr_cq *cq,
3767 			    int num_entries, struct ib_wc *wc,
3768 			    struct rdma_cqe_requester *req)
3769 {
3770 	int cnt = 0;
3771 
3772 	switch (req->status) {
3773 	case RDMA_CQE_REQ_STS_OK:
3774 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3775 				  IB_WC_SUCCESS, 0);
3776 		break;
3777 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3778 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3779 			DP_DEBUG(dev, QEDR_MSG_CQ,
3780 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3781 				 cq->icid, qp->icid);
3782 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3783 				  IB_WC_WR_FLUSH_ERR, 1);
3784 		break;
3785 	default:
3786 		/* process all WQE before the cosumer */
3787 		qp->state = QED_ROCE_QP_STATE_ERR;
3788 		cnt = process_req(dev, qp, cq, num_entries, wc,
3789 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3790 		wc += cnt;
3791 		/* if we have extra WC fill it with actual error info */
3792 		if (cnt < num_entries) {
3793 			enum ib_wc_status wc_status;
3794 
3795 			switch (req->status) {
3796 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3797 				DP_ERR(dev,
3798 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3799 				       cq->icid, qp->icid);
3800 				wc_status = IB_WC_BAD_RESP_ERR;
3801 				break;
3802 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3803 				DP_ERR(dev,
3804 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3805 				       cq->icid, qp->icid);
3806 				wc_status = IB_WC_LOC_LEN_ERR;
3807 				break;
3808 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3809 				DP_ERR(dev,
3810 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3811 				       cq->icid, qp->icid);
3812 				wc_status = IB_WC_LOC_QP_OP_ERR;
3813 				break;
3814 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3815 				DP_ERR(dev,
3816 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3817 				       cq->icid, qp->icid);
3818 				wc_status = IB_WC_LOC_PROT_ERR;
3819 				break;
3820 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3821 				DP_ERR(dev,
3822 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3823 				       cq->icid, qp->icid);
3824 				wc_status = IB_WC_MW_BIND_ERR;
3825 				break;
3826 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3827 				DP_ERR(dev,
3828 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3829 				       cq->icid, qp->icid);
3830 				wc_status = IB_WC_REM_INV_REQ_ERR;
3831 				break;
3832 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3833 				DP_ERR(dev,
3834 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3835 				       cq->icid, qp->icid);
3836 				wc_status = IB_WC_REM_ACCESS_ERR;
3837 				break;
3838 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3839 				DP_ERR(dev,
3840 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3841 				       cq->icid, qp->icid);
3842 				wc_status = IB_WC_REM_OP_ERR;
3843 				break;
3844 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3845 				DP_ERR(dev,
3846 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3847 				       cq->icid, qp->icid);
3848 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3849 				break;
3850 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3851 				DP_ERR(dev,
3852 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3853 				       cq->icid, qp->icid);
3854 				wc_status = IB_WC_RETRY_EXC_ERR;
3855 				break;
3856 			default:
3857 				DP_ERR(dev,
3858 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3859 				       cq->icid, qp->icid);
3860 				wc_status = IB_WC_GENERAL_ERR;
3861 			}
3862 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3863 					   wc_status, 1);
3864 		}
3865 	}
3866 
3867 	return cnt;
3868 }
3869 
3870 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3871 {
3872 	switch (status) {
3873 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3874 		return IB_WC_LOC_ACCESS_ERR;
3875 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3876 		return IB_WC_LOC_LEN_ERR;
3877 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3878 		return IB_WC_LOC_QP_OP_ERR;
3879 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3880 		return IB_WC_LOC_PROT_ERR;
3881 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3882 		return IB_WC_MW_BIND_ERR;
3883 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3884 		return IB_WC_REM_INV_RD_REQ_ERR;
3885 	case RDMA_CQE_RESP_STS_OK:
3886 		return IB_WC_SUCCESS;
3887 	default:
3888 		return IB_WC_GENERAL_ERR;
3889 	}
3890 }
3891 
3892 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3893 					  struct ib_wc *wc)
3894 {
3895 	wc->status = IB_WC_SUCCESS;
3896 	wc->byte_len = le32_to_cpu(resp->length);
3897 
3898 	if (resp->flags & QEDR_RESP_IMM) {
3899 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3900 		wc->wc_flags |= IB_WC_WITH_IMM;
3901 
3902 		if (resp->flags & QEDR_RESP_RDMA)
3903 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3904 
3905 		if (resp->flags & QEDR_RESP_INV)
3906 			return -EINVAL;
3907 
3908 	} else if (resp->flags & QEDR_RESP_INV) {
3909 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3910 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3911 
3912 		if (resp->flags & QEDR_RESP_RDMA)
3913 			return -EINVAL;
3914 
3915 	} else if (resp->flags & QEDR_RESP_RDMA) {
3916 		return -EINVAL;
3917 	}
3918 
3919 	return 0;
3920 }
3921 
3922 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3923 			       struct qedr_cq *cq, struct ib_wc *wc,
3924 			       struct rdma_cqe_responder *resp, u64 wr_id)
3925 {
3926 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3927 	wc->opcode = IB_WC_RECV;
3928 	wc->wc_flags = 0;
3929 
3930 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3931 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3932 			DP_ERR(dev,
3933 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3934 			       cq, cq->icid, resp->flags);
3935 
3936 	} else {
3937 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3938 		if (wc->status == IB_WC_GENERAL_ERR)
3939 			DP_ERR(dev,
3940 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3941 			       cq, cq->icid, resp->status);
3942 	}
3943 
3944 	/* Fill the rest of the WC */
3945 	wc->vendor_err = 0;
3946 	wc->src_qp = qp->id;
3947 	wc->qp = &qp->ibqp;
3948 	wc->wr_id = wr_id;
3949 }
3950 
3951 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
3952 				struct qedr_cq *cq, struct ib_wc *wc,
3953 				struct rdma_cqe_responder *resp)
3954 {
3955 	struct qedr_srq *srq = qp->srq;
3956 	u64 wr_id;
3957 
3958 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
3959 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
3960 
3961 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3962 		wc->status = IB_WC_WR_FLUSH_ERR;
3963 		wc->vendor_err = 0;
3964 		wc->wr_id = wr_id;
3965 		wc->byte_len = 0;
3966 		wc->src_qp = qp->id;
3967 		wc->qp = &qp->ibqp;
3968 		wc->wr_id = wr_id;
3969 	} else {
3970 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3971 	}
3972 	srq->hw_srq.wr_cons_cnt++;
3973 
3974 	return 1;
3975 }
3976 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3977 			    struct qedr_cq *cq, struct ib_wc *wc,
3978 			    struct rdma_cqe_responder *resp)
3979 {
3980 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3981 
3982 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3983 
3984 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3985 		qed_chain_consume(&qp->rq.pbl);
3986 	qedr_inc_sw_cons(&qp->rq);
3987 
3988 	return 1;
3989 }
3990 
3991 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3992 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3993 {
3994 	u16 cnt = 0;
3995 
3996 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3997 		/* fill WC */
3998 		wc->status = IB_WC_WR_FLUSH_ERR;
3999 		wc->vendor_err = 0;
4000 		wc->wc_flags = 0;
4001 		wc->src_qp = qp->id;
4002 		wc->byte_len = 0;
4003 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4004 		wc->qp = &qp->ibqp;
4005 		num_entries--;
4006 		wc++;
4007 		cnt++;
4008 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4009 			qed_chain_consume(&qp->rq.pbl);
4010 		qedr_inc_sw_cons(&qp->rq);
4011 	}
4012 
4013 	return cnt;
4014 }
4015 
4016 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4017 				 struct rdma_cqe_responder *resp, int *update)
4018 {
4019 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4020 		consume_cqe(cq);
4021 		*update |= 1;
4022 	}
4023 }
4024 
4025 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4026 				 struct qedr_cq *cq, int num_entries,
4027 				 struct ib_wc *wc,
4028 				 struct rdma_cqe_responder *resp)
4029 {
4030 	int cnt;
4031 
4032 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4033 	consume_cqe(cq);
4034 
4035 	return cnt;
4036 }
4037 
4038 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4039 			     struct qedr_cq *cq, int num_entries,
4040 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4041 			     int *update)
4042 {
4043 	int cnt;
4044 
4045 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4046 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4047 					 resp->rq_cons_or_srq_id);
4048 		try_consume_resp_cqe(cq, qp, resp, update);
4049 	} else {
4050 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4051 		consume_cqe(cq);
4052 		*update |= 1;
4053 	}
4054 
4055 	return cnt;
4056 }
4057 
4058 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4059 				struct rdma_cqe_requester *req, int *update)
4060 {
4061 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4062 		consume_cqe(cq);
4063 		*update |= 1;
4064 	}
4065 }
4066 
4067 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4068 {
4069 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4070 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4071 	union rdma_cqe *cqe;
4072 	u32 old_cons, new_cons;
4073 	unsigned long flags;
4074 	int update = 0;
4075 	int done = 0;
4076 
4077 	if (cq->destroyed) {
4078 		DP_ERR(dev,
4079 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4080 		       cq, cq->icid);
4081 		return 0;
4082 	}
4083 
4084 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4085 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4086 
4087 	spin_lock_irqsave(&cq->cq_lock, flags);
4088 	cqe = cq->latest_cqe;
4089 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4090 	while (num_entries && is_valid_cqe(cq, cqe)) {
4091 		struct qedr_qp *qp;
4092 		int cnt = 0;
4093 
4094 		/* prevent speculative reads of any field of CQE */
4095 		rmb();
4096 
4097 		qp = cqe_get_qp(cqe);
4098 		if (!qp) {
4099 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4100 			break;
4101 		}
4102 
4103 		wc->qp = &qp->ibqp;
4104 
4105 		switch (cqe_get_type(cqe)) {
4106 		case RDMA_CQE_TYPE_REQUESTER:
4107 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4108 					       &cqe->req);
4109 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4110 			break;
4111 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4112 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4113 						&cqe->resp, &update);
4114 			break;
4115 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4116 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4117 						    wc, &cqe->resp);
4118 			update = 1;
4119 			break;
4120 		case RDMA_CQE_TYPE_INVALID:
4121 		default:
4122 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4123 			       cqe_get_type(cqe));
4124 		}
4125 		num_entries -= cnt;
4126 		wc += cnt;
4127 		done += cnt;
4128 
4129 		cqe = get_cqe(cq);
4130 	}
4131 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4132 
4133 	cq->cq_cons += new_cons - old_cons;
4134 
4135 	if (update)
4136 		/* doorbell notifies abount latest VALID entry,
4137 		 * but chain already point to the next INVALID one
4138 		 */
4139 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4140 
4141 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4142 	return done;
4143 }
4144 
4145 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4146 		     u8 port_num,
4147 		     const struct ib_wc *in_wc,
4148 		     const struct ib_grh *in_grh,
4149 		     const struct ib_mad_hdr *mad_hdr,
4150 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
4151 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4152 {
4153 	struct qedr_dev *dev = get_qedr_dev(ibdev);
4154 
4155 	DP_DEBUG(dev, QEDR_MSG_GSI,
4156 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
4157 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
4158 		 mad_hdr->class_specific, mad_hdr->class_version,
4159 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
4160 	return IB_MAD_RESULT_SUCCESS;
4161 }
4162