xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 1c2dd16a)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_cm.h"
53 
54 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 
56 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
57 {
58 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
59 		return -EINVAL;
60 
61 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
62 	return 0;
63 }
64 
65 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
66 		   union ib_gid *sgid)
67 {
68 	struct qedr_dev *dev = get_qedr_dev(ibdev);
69 	int rc = 0;
70 
71 	if (!rdma_cap_roce_gid_table(ibdev, port))
72 		return -ENODEV;
73 
74 	rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
75 	if (rc == -EAGAIN) {
76 		memcpy(sgid, &zgid, sizeof(*sgid));
77 		return 0;
78 	}
79 
80 	DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
81 		 sgid->global.interface_id, sgid->global.subnet_prefix);
82 
83 	return rc;
84 }
85 
86 int qedr_add_gid(struct ib_device *device, u8 port_num,
87 		 unsigned int index, const union ib_gid *gid,
88 		 const struct ib_gid_attr *attr, void **context)
89 {
90 	if (!rdma_cap_roce_gid_table(device, port_num))
91 		return -EINVAL;
92 
93 	if (port_num > QEDR_MAX_PORT)
94 		return -EINVAL;
95 
96 	if (!context)
97 		return -EINVAL;
98 
99 	return 0;
100 }
101 
102 int qedr_del_gid(struct ib_device *device, u8 port_num,
103 		 unsigned int index, void **context)
104 {
105 	if (!rdma_cap_roce_gid_table(device, port_num))
106 		return -EINVAL;
107 
108 	if (port_num > QEDR_MAX_PORT)
109 		return -EINVAL;
110 
111 	if (!context)
112 		return -EINVAL;
113 
114 	return 0;
115 }
116 
117 int qedr_query_device(struct ib_device *ibdev,
118 		      struct ib_device_attr *attr, struct ib_udata *udata)
119 {
120 	struct qedr_dev *dev = get_qedr_dev(ibdev);
121 	struct qedr_device_attr *qattr = &dev->attr;
122 
123 	if (!dev->rdma_ctx) {
124 		DP_ERR(dev,
125 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
126 		       dev->rdma_ctx);
127 		return -EINVAL;
128 	}
129 
130 	memset(attr, 0, sizeof(*attr));
131 
132 	attr->fw_ver = qattr->fw_ver;
133 	attr->sys_image_guid = qattr->sys_image_guid;
134 	attr->max_mr_size = qattr->max_mr_size;
135 	attr->page_size_cap = qattr->page_size_caps;
136 	attr->vendor_id = qattr->vendor_id;
137 	attr->vendor_part_id = qattr->vendor_part_id;
138 	attr->hw_ver = qattr->hw_ver;
139 	attr->max_qp = qattr->max_qp;
140 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
141 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
142 	    IB_DEVICE_RC_RNR_NAK_GEN |
143 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
144 
145 	attr->max_sge = qattr->max_sge;
146 	attr->max_sge_rd = qattr->max_sge;
147 	attr->max_cq = qattr->max_cq;
148 	attr->max_cqe = qattr->max_cqe;
149 	attr->max_mr = qattr->max_mr;
150 	attr->max_mw = qattr->max_mw;
151 	attr->max_pd = qattr->max_pd;
152 	attr->atomic_cap = dev->atomic_cap;
153 	attr->max_fmr = qattr->max_fmr;
154 	attr->max_map_per_fmr = 16;
155 	attr->max_qp_init_rd_atom =
156 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
157 	attr->max_qp_rd_atom =
158 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
159 		attr->max_qp_init_rd_atom);
160 
161 	attr->max_srq = qattr->max_srq;
162 	attr->max_srq_sge = qattr->max_srq_sge;
163 	attr->max_srq_wr = qattr->max_srq_wr;
164 
165 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
166 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
167 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
168 	attr->max_ah = qattr->max_ah;
169 
170 	return 0;
171 }
172 
173 #define QEDR_SPEED_SDR		(1)
174 #define QEDR_SPEED_DDR		(2)
175 #define QEDR_SPEED_QDR		(4)
176 #define QEDR_SPEED_FDR10	(8)
177 #define QEDR_SPEED_FDR		(16)
178 #define QEDR_SPEED_EDR		(32)
179 
180 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
181 					    u8 *ib_width)
182 {
183 	switch (speed) {
184 	case 1000:
185 		*ib_speed = QEDR_SPEED_SDR;
186 		*ib_width = IB_WIDTH_1X;
187 		break;
188 	case 10000:
189 		*ib_speed = QEDR_SPEED_QDR;
190 		*ib_width = IB_WIDTH_1X;
191 		break;
192 
193 	case 20000:
194 		*ib_speed = QEDR_SPEED_DDR;
195 		*ib_width = IB_WIDTH_4X;
196 		break;
197 
198 	case 25000:
199 		*ib_speed = QEDR_SPEED_EDR;
200 		*ib_width = IB_WIDTH_1X;
201 		break;
202 
203 	case 40000:
204 		*ib_speed = QEDR_SPEED_QDR;
205 		*ib_width = IB_WIDTH_4X;
206 		break;
207 
208 	case 50000:
209 		*ib_speed = QEDR_SPEED_QDR;
210 		*ib_width = IB_WIDTH_4X;
211 		break;
212 
213 	case 100000:
214 		*ib_speed = QEDR_SPEED_EDR;
215 		*ib_width = IB_WIDTH_4X;
216 		break;
217 
218 	default:
219 		/* Unsupported */
220 		*ib_speed = QEDR_SPEED_SDR;
221 		*ib_width = IB_WIDTH_1X;
222 	}
223 }
224 
225 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
226 {
227 	struct qedr_dev *dev;
228 	struct qed_rdma_port *rdma_port;
229 
230 	dev = get_qedr_dev(ibdev);
231 	if (port > 1) {
232 		DP_ERR(dev, "invalid_port=0x%x\n", port);
233 		return -EINVAL;
234 	}
235 
236 	if (!dev->rdma_ctx) {
237 		DP_ERR(dev, "rdma_ctx is NULL\n");
238 		return -EINVAL;
239 	}
240 
241 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
242 
243 	/* *attr being zeroed by the caller, avoid zeroing it here */
244 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
245 		attr->state = IB_PORT_ACTIVE;
246 		attr->phys_state = 5;
247 	} else {
248 		attr->state = IB_PORT_DOWN;
249 		attr->phys_state = 3;
250 	}
251 	attr->max_mtu = IB_MTU_4096;
252 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
253 	attr->lid = 0;
254 	attr->lmc = 0;
255 	attr->sm_lid = 0;
256 	attr->sm_sl = 0;
257 	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
258 	attr->gid_tbl_len = QEDR_MAX_SGID;
259 	attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
260 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
261 	attr->qkey_viol_cntr = 0;
262 	get_link_speed_and_width(rdma_port->link_speed,
263 				 &attr->active_speed, &attr->active_width);
264 	attr->max_msg_sz = rdma_port->max_msg_size;
265 	attr->max_vl_num = 4;
266 
267 	return 0;
268 }
269 
270 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
271 		     struct ib_port_modify *props)
272 {
273 	struct qedr_dev *dev;
274 
275 	dev = get_qedr_dev(ibdev);
276 	if (port > 1) {
277 		DP_ERR(dev, "invalid_port=0x%x\n", port);
278 		return -EINVAL;
279 	}
280 
281 	return 0;
282 }
283 
284 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
285 			 unsigned long len)
286 {
287 	struct qedr_mm *mm;
288 
289 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
290 	if (!mm)
291 		return -ENOMEM;
292 
293 	mm->key.phy_addr = phy_addr;
294 	/* This function might be called with a length which is not a multiple
295 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
296 	 * forces this granularity by increasing the requested size if needed.
297 	 * When qedr_mmap is called, it will search the list with the updated
298 	 * length as a key. To prevent search failures, the length is rounded up
299 	 * in advance to PAGE_SIZE.
300 	 */
301 	mm->key.len = roundup(len, PAGE_SIZE);
302 	INIT_LIST_HEAD(&mm->entry);
303 
304 	mutex_lock(&uctx->mm_list_lock);
305 	list_add(&mm->entry, &uctx->mm_head);
306 	mutex_unlock(&uctx->mm_list_lock);
307 
308 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
309 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
310 		 (unsigned long long)mm->key.phy_addr,
311 		 (unsigned long)mm->key.len, uctx);
312 
313 	return 0;
314 }
315 
316 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
317 			     unsigned long len)
318 {
319 	bool found = false;
320 	struct qedr_mm *mm;
321 
322 	mutex_lock(&uctx->mm_list_lock);
323 	list_for_each_entry(mm, &uctx->mm_head, entry) {
324 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
325 			continue;
326 
327 		found = true;
328 		break;
329 	}
330 	mutex_unlock(&uctx->mm_list_lock);
331 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
332 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
333 		 mm->key.phy_addr, mm->key.len, uctx, found);
334 
335 	return found;
336 }
337 
338 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
339 					struct ib_udata *udata)
340 {
341 	int rc;
342 	struct qedr_ucontext *ctx;
343 	struct qedr_alloc_ucontext_resp uresp;
344 	struct qedr_dev *dev = get_qedr_dev(ibdev);
345 	struct qed_rdma_add_user_out_params oparams;
346 
347 	if (!udata)
348 		return ERR_PTR(-EFAULT);
349 
350 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
351 	if (!ctx)
352 		return ERR_PTR(-ENOMEM);
353 
354 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
355 	if (rc) {
356 		DP_ERR(dev,
357 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
358 		       rc);
359 		goto err;
360 	}
361 
362 	ctx->dpi = oparams.dpi;
363 	ctx->dpi_addr = oparams.dpi_addr;
364 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
365 	ctx->dpi_size = oparams.dpi_size;
366 	INIT_LIST_HEAD(&ctx->mm_head);
367 	mutex_init(&ctx->mm_list_lock);
368 
369 	memset(&uresp, 0, sizeof(uresp));
370 
371 	uresp.db_pa = ctx->dpi_phys_addr;
372 	uresp.db_size = ctx->dpi_size;
373 	uresp.max_send_wr = dev->attr.max_sqe;
374 	uresp.max_recv_wr = dev->attr.max_rqe;
375 	uresp.max_srq_wr = dev->attr.max_srq_wr;
376 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
377 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
378 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
379 	uresp.max_cqes = QEDR_MAX_CQES;
380 
381 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
382 	if (rc)
383 		goto err;
384 
385 	ctx->dev = dev;
386 
387 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
388 	if (rc)
389 		goto err;
390 
391 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
392 		 &ctx->ibucontext);
393 	return &ctx->ibucontext;
394 
395 err:
396 	kfree(ctx);
397 	return ERR_PTR(rc);
398 }
399 
400 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
401 {
402 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
403 	struct qedr_mm *mm, *tmp;
404 	int status = 0;
405 
406 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
407 		 uctx);
408 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
409 
410 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
411 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
412 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
413 			 mm->key.phy_addr, mm->key.len, uctx);
414 		list_del(&mm->entry);
415 		kfree(mm);
416 	}
417 
418 	kfree(uctx);
419 	return status;
420 }
421 
422 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
423 {
424 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
425 	struct qedr_dev *dev = get_qedr_dev(context->device);
426 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
427 	u64 unmapped_db = dev->db_phys_addr;
428 	unsigned long len = (vma->vm_end - vma->vm_start);
429 	int rc = 0;
430 	bool found;
431 
432 	DP_DEBUG(dev, QEDR_MSG_INIT,
433 		 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
434 		 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
435 	if (vma->vm_start & (PAGE_SIZE - 1)) {
436 		DP_ERR(dev, "Vma_start not page aligned = %ld\n",
437 		       vma->vm_start);
438 		return -EINVAL;
439 	}
440 
441 	found = qedr_search_mmap(ucontext, vm_page, len);
442 	if (!found) {
443 		DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
444 		       vma->vm_pgoff);
445 		return -EINVAL;
446 	}
447 
448 	DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
449 
450 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
451 						     dev->db_size))) {
452 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
453 		if (vma->vm_flags & VM_READ) {
454 			DP_ERR(dev, "Trying to map doorbell bar for read\n");
455 			return -EPERM;
456 		}
457 
458 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
459 
460 		rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
461 					PAGE_SIZE, vma->vm_page_prot);
462 	} else {
463 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
464 		rc = remap_pfn_range(vma, vma->vm_start,
465 				     vma->vm_pgoff, len, vma->vm_page_prot);
466 	}
467 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
468 	return rc;
469 }
470 
471 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
472 			    struct ib_ucontext *context, struct ib_udata *udata)
473 {
474 	struct qedr_dev *dev = get_qedr_dev(ibdev);
475 	struct qedr_pd *pd;
476 	u16 pd_id;
477 	int rc;
478 
479 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
480 		 (udata && context) ? "User Lib" : "Kernel");
481 
482 	if (!dev->rdma_ctx) {
483 		DP_ERR(dev, "invlaid RDMA context\n");
484 		return ERR_PTR(-EINVAL);
485 	}
486 
487 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
488 	if (!pd)
489 		return ERR_PTR(-ENOMEM);
490 
491 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
492 	if (rc)
493 		goto err;
494 
495 	pd->pd_id = pd_id;
496 
497 	if (udata && context) {
498 		struct qedr_alloc_pd_uresp uresp;
499 
500 		uresp.pd_id = pd_id;
501 
502 		rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
503 		if (rc) {
504 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
505 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
506 			goto err;
507 		}
508 
509 		pd->uctx = get_qedr_ucontext(context);
510 		pd->uctx->pd = pd;
511 	}
512 
513 	return &pd->ibpd;
514 
515 err:
516 	kfree(pd);
517 	return ERR_PTR(rc);
518 }
519 
520 int qedr_dealloc_pd(struct ib_pd *ibpd)
521 {
522 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
523 	struct qedr_pd *pd = get_qedr_pd(ibpd);
524 
525 	if (!pd) {
526 		pr_err("Invalid PD received in dealloc_pd\n");
527 		return -EINVAL;
528 	}
529 
530 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
531 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
532 
533 	kfree(pd);
534 
535 	return 0;
536 }
537 
538 static void qedr_free_pbl(struct qedr_dev *dev,
539 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
540 {
541 	struct pci_dev *pdev = dev->pdev;
542 	int i;
543 
544 	for (i = 0; i < pbl_info->num_pbls; i++) {
545 		if (!pbl[i].va)
546 			continue;
547 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
548 				  pbl[i].va, pbl[i].pa);
549 	}
550 
551 	kfree(pbl);
552 }
553 
554 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
555 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
556 
557 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
558 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
559 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
560 
561 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
562 					   struct qedr_pbl_info *pbl_info,
563 					   gfp_t flags)
564 {
565 	struct pci_dev *pdev = dev->pdev;
566 	struct qedr_pbl *pbl_table;
567 	dma_addr_t *pbl_main_tbl;
568 	dma_addr_t pa;
569 	void *va;
570 	int i;
571 
572 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
573 	if (!pbl_table)
574 		return ERR_PTR(-ENOMEM);
575 
576 	for (i = 0; i < pbl_info->num_pbls; i++) {
577 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
578 					&pa, flags);
579 		if (!va)
580 			goto err;
581 
582 		memset(va, 0, pbl_info->pbl_size);
583 		pbl_table[i].va = va;
584 		pbl_table[i].pa = pa;
585 	}
586 
587 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
588 	 * the first one with physical pointers to all of the rest
589 	 */
590 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
591 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
592 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
593 
594 	return pbl_table;
595 
596 err:
597 	for (i--; i >= 0; i--)
598 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
599 				  pbl_table[i].va, pbl_table[i].pa);
600 
601 	qedr_free_pbl(dev, pbl_info, pbl_table);
602 
603 	return ERR_PTR(-ENOMEM);
604 }
605 
606 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
607 				struct qedr_pbl_info *pbl_info,
608 				u32 num_pbes, int two_layer_capable)
609 {
610 	u32 pbl_capacity;
611 	u32 pbl_size;
612 	u32 num_pbls;
613 
614 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
615 		if (num_pbes > MAX_PBES_TWO_LAYER) {
616 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
617 			       num_pbes);
618 			return -EINVAL;
619 		}
620 
621 		/* calculate required pbl page size */
622 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
623 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
624 			       NUM_PBES_ON_PAGE(pbl_size);
625 
626 		while (pbl_capacity < num_pbes) {
627 			pbl_size *= 2;
628 			pbl_capacity = pbl_size / sizeof(u64);
629 			pbl_capacity = pbl_capacity * pbl_capacity;
630 		}
631 
632 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
633 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
634 		pbl_info->two_layered = true;
635 	} else {
636 		/* One layered PBL */
637 		num_pbls = 1;
638 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
639 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
640 		pbl_info->two_layered = false;
641 	}
642 
643 	pbl_info->num_pbls = num_pbls;
644 	pbl_info->pbl_size = pbl_size;
645 	pbl_info->num_pbes = num_pbes;
646 
647 	DP_DEBUG(dev, QEDR_MSG_MR,
648 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
649 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
650 
651 	return 0;
652 }
653 
654 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
655 			       struct qedr_pbl *pbl,
656 			       struct qedr_pbl_info *pbl_info)
657 {
658 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
659 	struct qedr_pbl *pbl_tbl;
660 	struct scatterlist *sg;
661 	struct regpair *pbe;
662 	int entry;
663 	u32 addr;
664 
665 	if (!pbl_info->num_pbes)
666 		return;
667 
668 	/* If we have a two layered pbl, the first pbl points to the rest
669 	 * of the pbls and the first entry lays on the second pbl in the table
670 	 */
671 	if (pbl_info->two_layered)
672 		pbl_tbl = &pbl[1];
673 	else
674 		pbl_tbl = pbl;
675 
676 	pbe = (struct regpair *)pbl_tbl->va;
677 	if (!pbe) {
678 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
679 		return;
680 	}
681 
682 	pbe_cnt = 0;
683 
684 	shift = ilog2(umem->page_size);
685 
686 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
687 		pages = sg_dma_len(sg) >> shift;
688 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
689 			/* store the page address in pbe */
690 			pbe->lo = cpu_to_le32(sg_dma_address(sg) +
691 					      umem->page_size * pg_cnt);
692 			addr = upper_32_bits(sg_dma_address(sg) +
693 					     umem->page_size * pg_cnt);
694 			pbe->hi = cpu_to_le32(addr);
695 			pbe_cnt++;
696 			total_num_pbes++;
697 			pbe++;
698 
699 			if (total_num_pbes == pbl_info->num_pbes)
700 				return;
701 
702 			/* If the given pbl is full storing the pbes,
703 			 * move to next pbl.
704 			 */
705 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
706 				pbl_tbl++;
707 				pbe = (struct regpair *)pbl_tbl->va;
708 				pbe_cnt = 0;
709 			}
710 		}
711 	}
712 }
713 
714 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
715 			      struct qedr_cq *cq, struct ib_udata *udata)
716 {
717 	struct qedr_create_cq_uresp uresp;
718 	int rc;
719 
720 	memset(&uresp, 0, sizeof(uresp));
721 
722 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
723 	uresp.icid = cq->icid;
724 
725 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
726 	if (rc)
727 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
728 
729 	return rc;
730 }
731 
732 static void consume_cqe(struct qedr_cq *cq)
733 {
734 	if (cq->latest_cqe == cq->toggle_cqe)
735 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
736 
737 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
738 }
739 
740 static inline int qedr_align_cq_entries(int entries)
741 {
742 	u64 size, aligned_size;
743 
744 	/* We allocate an extra entry that we don't report to the FW. */
745 	size = (entries + 1) * QEDR_CQE_SIZE;
746 	aligned_size = ALIGN(size, PAGE_SIZE);
747 
748 	return aligned_size / QEDR_CQE_SIZE;
749 }
750 
751 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
752 				       struct qedr_dev *dev,
753 				       struct qedr_userq *q,
754 				       u64 buf_addr, size_t buf_len,
755 				       int access, int dmasync)
756 {
757 	int page_cnt;
758 	int rc;
759 
760 	q->buf_addr = buf_addr;
761 	q->buf_len = buf_len;
762 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
763 	if (IS_ERR(q->umem)) {
764 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
765 		       PTR_ERR(q->umem));
766 		return PTR_ERR(q->umem);
767 	}
768 
769 	page_cnt = ib_umem_page_count(q->umem);
770 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
771 	if (rc)
772 		goto err0;
773 
774 	q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
775 	if (IS_ERR(q->pbl_tbl)) {
776 		rc = PTR_ERR(q->pbl_tbl);
777 		goto err0;
778 	}
779 
780 	qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
781 
782 	return 0;
783 
784 err0:
785 	ib_umem_release(q->umem);
786 
787 	return rc;
788 }
789 
790 static inline void qedr_init_cq_params(struct qedr_cq *cq,
791 				       struct qedr_ucontext *ctx,
792 				       struct qedr_dev *dev, int vector,
793 				       int chain_entries, int page_cnt,
794 				       u64 pbl_ptr,
795 				       struct qed_rdma_create_cq_in_params
796 				       *params)
797 {
798 	memset(params, 0, sizeof(*params));
799 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
800 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
801 	params->cnq_id = vector;
802 	params->cq_size = chain_entries - 1;
803 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
804 	params->pbl_num_pages = page_cnt;
805 	params->pbl_ptr = pbl_ptr;
806 	params->pbl_two_level = 0;
807 }
808 
809 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
810 {
811 	/* Flush data before signalling doorbell */
812 	wmb();
813 	cq->db.data.agg_flags = flags;
814 	cq->db.data.value = cpu_to_le32(cons);
815 	writeq(cq->db.raw, cq->db_addr);
816 
817 	/* Make sure write would stick */
818 	mmiowb();
819 }
820 
821 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
822 {
823 	struct qedr_cq *cq = get_qedr_cq(ibcq);
824 	unsigned long sflags;
825 
826 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
827 		return 0;
828 
829 	spin_lock_irqsave(&cq->cq_lock, sflags);
830 
831 	cq->arm_flags = 0;
832 
833 	if (flags & IB_CQ_SOLICITED)
834 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
835 
836 	if (flags & IB_CQ_NEXT_COMP)
837 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
838 
839 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
840 
841 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
842 
843 	return 0;
844 }
845 
846 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
847 			     const struct ib_cq_init_attr *attr,
848 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
849 {
850 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
851 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
852 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
853 	struct qedr_dev *dev = get_qedr_dev(ibdev);
854 	struct qed_rdma_create_cq_in_params params;
855 	struct qedr_create_cq_ureq ureq;
856 	int vector = attr->comp_vector;
857 	int entries = attr->cqe;
858 	struct qedr_cq *cq;
859 	int chain_entries;
860 	int page_cnt;
861 	u64 pbl_ptr;
862 	u16 icid;
863 	int rc;
864 
865 	DP_DEBUG(dev, QEDR_MSG_INIT,
866 		 "create_cq: called from %s. entries=%d, vector=%d\n",
867 		 udata ? "User Lib" : "Kernel", entries, vector);
868 
869 	if (entries > QEDR_MAX_CQES) {
870 		DP_ERR(dev,
871 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
872 		       entries, QEDR_MAX_CQES);
873 		return ERR_PTR(-EINVAL);
874 	}
875 
876 	chain_entries = qedr_align_cq_entries(entries);
877 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
878 
879 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
880 	if (!cq)
881 		return ERR_PTR(-ENOMEM);
882 
883 	if (udata) {
884 		memset(&ureq, 0, sizeof(ureq));
885 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
886 			DP_ERR(dev,
887 			       "create cq: problem copying data from user space\n");
888 			goto err0;
889 		}
890 
891 		if (!ureq.len) {
892 			DP_ERR(dev,
893 			       "create cq: cannot create a cq with 0 entries\n");
894 			goto err0;
895 		}
896 
897 		cq->cq_type = QEDR_CQ_TYPE_USER;
898 
899 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
900 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
901 		if (rc)
902 			goto err0;
903 
904 		pbl_ptr = cq->q.pbl_tbl->pa;
905 		page_cnt = cq->q.pbl_info.num_pbes;
906 
907 		cq->ibcq.cqe = chain_entries;
908 	} else {
909 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
910 
911 		rc = dev->ops->common->chain_alloc(dev->cdev,
912 						   QED_CHAIN_USE_TO_CONSUME,
913 						   QED_CHAIN_MODE_PBL,
914 						   QED_CHAIN_CNT_TYPE_U32,
915 						   chain_entries,
916 						   sizeof(union rdma_cqe),
917 						   &cq->pbl);
918 		if (rc)
919 			goto err1;
920 
921 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
922 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
923 		cq->ibcq.cqe = cq->pbl.capacity;
924 	}
925 
926 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
927 			    pbl_ptr, &params);
928 
929 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
930 	if (rc)
931 		goto err2;
932 
933 	cq->icid = icid;
934 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
935 	spin_lock_init(&cq->cq_lock);
936 
937 	if (ib_ctx) {
938 		rc = qedr_copy_cq_uresp(dev, cq, udata);
939 		if (rc)
940 			goto err3;
941 	} else {
942 		/* Generate doorbell address. */
943 		cq->db_addr = dev->db_addr +
944 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
945 		cq->db.data.icid = cq->icid;
946 		cq->db.data.params = DB_AGG_CMD_SET <<
947 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
948 
949 		/* point to the very last element, passing it we will toggle */
950 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
951 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
952 		cq->latest_cqe = NULL;
953 		consume_cqe(cq);
954 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
955 	}
956 
957 	DP_DEBUG(dev, QEDR_MSG_CQ,
958 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
959 		 cq->icid, cq, params.cq_size);
960 
961 	return &cq->ibcq;
962 
963 err3:
964 	destroy_iparams.icid = cq->icid;
965 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
966 				  &destroy_oparams);
967 err2:
968 	if (udata)
969 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
970 	else
971 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
972 err1:
973 	if (udata)
974 		ib_umem_release(cq->q.umem);
975 err0:
976 	kfree(cq);
977 	return ERR_PTR(-EINVAL);
978 }
979 
980 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
981 {
982 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
983 	struct qedr_cq *cq = get_qedr_cq(ibcq);
984 
985 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
986 
987 	return 0;
988 }
989 
990 int qedr_destroy_cq(struct ib_cq *ibcq)
991 {
992 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
993 	struct qed_rdma_destroy_cq_out_params oparams;
994 	struct qed_rdma_destroy_cq_in_params iparams;
995 	struct qedr_cq *cq = get_qedr_cq(ibcq);
996 
997 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
998 
999 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1000 	if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
1001 		int rc;
1002 
1003 		iparams.icid = cq->icid;
1004 		rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams,
1005 					       &oparams);
1006 		if (rc)
1007 			return rc;
1008 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1009 	}
1010 
1011 	if (ibcq->uobject && ibcq->uobject->context) {
1012 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1013 		ib_umem_release(cq->q.umem);
1014 	}
1015 
1016 	kfree(cq);
1017 
1018 	return 0;
1019 }
1020 
1021 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1022 					  struct ib_qp_attr *attr,
1023 					  int attr_mask,
1024 					  struct qed_rdma_modify_qp_in_params
1025 					  *qp_params)
1026 {
1027 	enum rdma_network_type nw_type;
1028 	struct ib_gid_attr gid_attr;
1029 	union ib_gid gid;
1030 	u32 ipv4_addr;
1031 	int rc = 0;
1032 	int i;
1033 
1034 	rc = ib_get_cached_gid(ibqp->device, attr->ah_attr.port_num,
1035 			       attr->ah_attr.grh.sgid_index, &gid, &gid_attr);
1036 	if (rc)
1037 		return rc;
1038 
1039 	if (!memcmp(&gid, &zgid, sizeof(gid)))
1040 		return -ENOENT;
1041 
1042 	if (gid_attr.ndev) {
1043 		qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1044 
1045 		dev_put(gid_attr.ndev);
1046 		nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1047 		switch (nw_type) {
1048 		case RDMA_NETWORK_IPV6:
1049 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1050 			       sizeof(qp_params->sgid));
1051 			memcpy(&qp_params->dgid.bytes[0],
1052 			       &attr->ah_attr.grh.dgid,
1053 			       sizeof(qp_params->dgid));
1054 			qp_params->roce_mode = ROCE_V2_IPV6;
1055 			SET_FIELD(qp_params->modify_flags,
1056 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1057 			break;
1058 		case RDMA_NETWORK_IB:
1059 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1060 			       sizeof(qp_params->sgid));
1061 			memcpy(&qp_params->dgid.bytes[0],
1062 			       &attr->ah_attr.grh.dgid,
1063 			       sizeof(qp_params->dgid));
1064 			qp_params->roce_mode = ROCE_V1;
1065 			break;
1066 		case RDMA_NETWORK_IPV4:
1067 			memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1068 			memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1069 			ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1070 			qp_params->sgid.ipv4_addr = ipv4_addr;
1071 			ipv4_addr =
1072 			    qedr_get_ipv4_from_gid(attr->ah_attr.grh.dgid.raw);
1073 			qp_params->dgid.ipv4_addr = ipv4_addr;
1074 			SET_FIELD(qp_params->modify_flags,
1075 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1076 			qp_params->roce_mode = ROCE_V2_IPV4;
1077 			break;
1078 		}
1079 	}
1080 
1081 	for (i = 0; i < 4; i++) {
1082 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1083 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1084 	}
1085 
1086 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1087 		qp_params->vlan_id = 0;
1088 
1089 	return 0;
1090 }
1091 
1092 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1093 			       struct ib_qp_init_attr *attrs)
1094 {
1095 	struct qedr_device_attr *qattr = &dev->attr;
1096 
1097 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1098 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1099 		DP_DEBUG(dev, QEDR_MSG_QP,
1100 			 "create qp: unsupported qp type=0x%x requested\n",
1101 			 attrs->qp_type);
1102 		return -EINVAL;
1103 	}
1104 
1105 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1106 		DP_ERR(dev,
1107 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1108 		       attrs->cap.max_send_wr, qattr->max_sqe);
1109 		return -EINVAL;
1110 	}
1111 
1112 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1113 		DP_ERR(dev,
1114 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1115 		       attrs->cap.max_inline_data, qattr->max_inline);
1116 		return -EINVAL;
1117 	}
1118 
1119 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1120 		DP_ERR(dev,
1121 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1122 		       attrs->cap.max_send_sge, qattr->max_sge);
1123 		return -EINVAL;
1124 	}
1125 
1126 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1127 		DP_ERR(dev,
1128 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1129 		       attrs->cap.max_recv_sge, qattr->max_sge);
1130 		return -EINVAL;
1131 	}
1132 
1133 	/* Unprivileged user space cannot create special QP */
1134 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1135 		DP_ERR(dev,
1136 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1137 		       attrs->qp_type);
1138 		return -EINVAL;
1139 	}
1140 
1141 	return 0;
1142 }
1143 
1144 static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
1145 			       struct qedr_qp *qp)
1146 {
1147 	uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1148 	uresp->rq_icid = qp->icid;
1149 }
1150 
1151 static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
1152 			       struct qedr_qp *qp)
1153 {
1154 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1155 	uresp->sq_icid = qp->icid + 1;
1156 }
1157 
1158 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1159 			      struct qedr_qp *qp, struct ib_udata *udata)
1160 {
1161 	struct qedr_create_qp_uresp uresp;
1162 	int rc;
1163 
1164 	memset(&uresp, 0, sizeof(uresp));
1165 	qedr_copy_sq_uresp(&uresp, qp);
1166 	qedr_copy_rq_uresp(&uresp, qp);
1167 
1168 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1169 	uresp.qp_id = qp->qp_id;
1170 
1171 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1172 	if (rc)
1173 		DP_ERR(dev,
1174 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1175 		       qp->icid);
1176 
1177 	return rc;
1178 }
1179 
1180 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1181 				      struct qedr_qp *qp,
1182 				      struct qedr_pd *pd,
1183 				      struct ib_qp_init_attr *attrs)
1184 {
1185 	spin_lock_init(&qp->q_lock);
1186 	qp->pd = pd;
1187 	qp->qp_type = attrs->qp_type;
1188 	qp->max_inline_data = attrs->cap.max_inline_data;
1189 	qp->sq.max_sges = attrs->cap.max_send_sge;
1190 	qp->state = QED_ROCE_QP_STATE_RESET;
1191 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1192 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1193 	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1194 	qp->dev = dev;
1195 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1196 
1197 	DP_DEBUG(dev, QEDR_MSG_QP,
1198 		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1199 		 qp->rq.max_sges, qp->rq_cq->icid);
1200 	DP_DEBUG(dev, QEDR_MSG_QP,
1201 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1202 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1203 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1204 	DP_DEBUG(dev, QEDR_MSG_QP,
1205 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1206 		 qp->sq.max_sges, qp->sq_cq->icid);
1207 }
1208 
1209 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1210 {
1211 	qp->sq.db = dev->db_addr +
1212 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1213 	qp->sq.db_data.data.icid = qp->icid + 1;
1214 	qp->rq.db = dev->db_addr +
1215 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1216 	qp->rq.db_data.data.icid = qp->icid;
1217 }
1218 
1219 static inline void
1220 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1221 			      struct qedr_pd *pd,
1222 			      struct qedr_qp *qp,
1223 			      struct ib_qp_init_attr *attrs,
1224 			      bool fmr_and_reserved_lkey,
1225 			      struct qed_rdma_create_qp_in_params *params)
1226 {
1227 	/* QP handle to be written in an async event */
1228 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1229 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1230 
1231 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1232 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1233 	params->pd = pd->pd_id;
1234 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1235 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1236 	params->stats_queue = 0;
1237 	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1238 	params->srq_id = 0;
1239 	params->use_srq = false;
1240 }
1241 
1242 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1243 {
1244 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1245 		 "qp=%p. "
1246 		 "sq_addr=0x%llx, "
1247 		 "sq_len=%zd, "
1248 		 "rq_addr=0x%llx, "
1249 		 "rq_len=%zd"
1250 		 "\n",
1251 		 qp,
1252 		 qp->usq.buf_addr,
1253 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1254 }
1255 
1256 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1257 {
1258 	if (qp->usq.umem)
1259 		ib_umem_release(qp->usq.umem);
1260 	qp->usq.umem = NULL;
1261 
1262 	if (qp->urq.umem)
1263 		ib_umem_release(qp->urq.umem);
1264 	qp->urq.umem = NULL;
1265 }
1266 
1267 static int qedr_create_user_qp(struct qedr_dev *dev,
1268 			       struct qedr_qp *qp,
1269 			       struct ib_pd *ibpd,
1270 			       struct ib_udata *udata,
1271 			       struct ib_qp_init_attr *attrs)
1272 {
1273 	struct qed_rdma_create_qp_in_params in_params;
1274 	struct qed_rdma_create_qp_out_params out_params;
1275 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1276 	struct ib_ucontext *ib_ctx = NULL;
1277 	struct qedr_ucontext *ctx = NULL;
1278 	struct qedr_create_qp_ureq ureq;
1279 	int rc = -EINVAL;
1280 
1281 	ib_ctx = ibpd->uobject->context;
1282 	ctx = get_qedr_ucontext(ib_ctx);
1283 
1284 	memset(&ureq, 0, sizeof(ureq));
1285 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1286 	if (rc) {
1287 		DP_ERR(dev, "Problem copying data from user space\n");
1288 		return rc;
1289 	}
1290 
1291 	/* SQ - read access only (0), dma sync not required (0) */
1292 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1293 				  ureq.sq_len, 0, 0);
1294 	if (rc)
1295 		return rc;
1296 
1297 	/* RQ - read access only (0), dma sync not required (0) */
1298 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1299 				  ureq.rq_len, 0, 0);
1300 
1301 	if (rc)
1302 		return rc;
1303 
1304 	memset(&in_params, 0, sizeof(in_params));
1305 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1306 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1307 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1308 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1309 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1310 	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1311 	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1312 
1313 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1314 					      &in_params, &out_params);
1315 
1316 	if (!qp->qed_qp) {
1317 		rc = -ENOMEM;
1318 		goto err1;
1319 	}
1320 
1321 	qp->qp_id = out_params.qp_id;
1322 	qp->icid = out_params.icid;
1323 
1324 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1325 	if (rc)
1326 		goto err;
1327 
1328 	qedr_qp_user_print(dev, qp);
1329 
1330 	return 0;
1331 err:
1332 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1333 	if (rc)
1334 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1335 
1336 err1:
1337 	qedr_cleanup_user(dev, qp);
1338 	return rc;
1339 }
1340 
1341 static int
1342 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1343 			   struct qedr_qp *qp,
1344 			   struct qed_rdma_create_qp_in_params *in_params,
1345 			   u32 n_sq_elems, u32 n_rq_elems)
1346 {
1347 	struct qed_rdma_create_qp_out_params out_params;
1348 	int rc;
1349 
1350 	rc = dev->ops->common->chain_alloc(dev->cdev,
1351 					   QED_CHAIN_USE_TO_PRODUCE,
1352 					   QED_CHAIN_MODE_PBL,
1353 					   QED_CHAIN_CNT_TYPE_U32,
1354 					   n_sq_elems,
1355 					   QEDR_SQE_ELEMENT_SIZE,
1356 					   &qp->sq.pbl);
1357 
1358 	if (rc)
1359 		return rc;
1360 
1361 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1362 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1363 
1364 	rc = dev->ops->common->chain_alloc(dev->cdev,
1365 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1366 					   QED_CHAIN_MODE_PBL,
1367 					   QED_CHAIN_CNT_TYPE_U32,
1368 					   n_rq_elems,
1369 					   QEDR_RQE_ELEMENT_SIZE,
1370 					   &qp->rq.pbl);
1371 	if (rc)
1372 		return rc;
1373 
1374 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1375 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1376 
1377 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1378 					      in_params, &out_params);
1379 
1380 	if (!qp->qed_qp)
1381 		return -EINVAL;
1382 
1383 	qp->qp_id = out_params.qp_id;
1384 	qp->icid = out_params.icid;
1385 
1386 	qedr_set_roce_db_info(dev, qp);
1387 
1388 	return 0;
1389 }
1390 
1391 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1392 {
1393 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1394 	kfree(qp->wqe_wr_id);
1395 
1396 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1397 	kfree(qp->rqe_wr_id);
1398 }
1399 
1400 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1401 				 struct qedr_qp *qp,
1402 				 struct ib_pd *ibpd,
1403 				 struct ib_qp_init_attr *attrs)
1404 {
1405 	struct qed_rdma_create_qp_in_params in_params;
1406 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1407 	int rc = -EINVAL;
1408 	u32 n_rq_elems;
1409 	u32 n_sq_elems;
1410 	u32 n_sq_entries;
1411 
1412 	memset(&in_params, 0, sizeof(in_params));
1413 
1414 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1415 	 * the ring. The ring should allow at least a single WR, even if the
1416 	 * user requested none, due to allocation issues.
1417 	 * We should add an extra WR since the prod and cons indices of
1418 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1419 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1420 	 * double the number of entries due an iSER issue that pushes far more
1421 	 * WRs than indicated. If we decline its ib_post_send() then we get
1422 	 * error prints in the dmesg we'd like to avoid.
1423 	 */
1424 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1425 			      dev->attr.max_sqe);
1426 
1427 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1428 				GFP_KERNEL);
1429 	if (!qp->wqe_wr_id) {
1430 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1431 		return -ENOMEM;
1432 	}
1433 
1434 	/* QP handle to be written in CQE */
1435 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1436 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1437 
1438 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1439 	 * the ring. There ring should allow at least a single WR, even if the
1440 	 * user requested none, due to allocation issues.
1441 	 */
1442 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1443 
1444 	/* Allocate driver internal RQ array */
1445 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1446 				GFP_KERNEL);
1447 	if (!qp->rqe_wr_id) {
1448 		DP_ERR(dev,
1449 		       "create qp: failed RQ shadow memory allocation\n");
1450 		kfree(qp->wqe_wr_id);
1451 		return -ENOMEM;
1452 	}
1453 
1454 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1455 
1456 	n_sq_entries = attrs->cap.max_send_wr;
1457 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1458 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1459 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1460 
1461 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1462 
1463 	rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1464 					n_sq_elems, n_rq_elems);
1465 	if (rc)
1466 		qedr_cleanup_kernel(dev, qp);
1467 
1468 	return rc;
1469 }
1470 
1471 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1472 			     struct ib_qp_init_attr *attrs,
1473 			     struct ib_udata *udata)
1474 {
1475 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1476 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1477 	struct qedr_qp *qp;
1478 	struct ib_qp *ibqp;
1479 	int rc = 0;
1480 
1481 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1482 		 udata ? "user library" : "kernel", pd);
1483 
1484 	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1485 	if (rc)
1486 		return ERR_PTR(rc);
1487 
1488 	if (attrs->srq)
1489 		return ERR_PTR(-EINVAL);
1490 
1491 	DP_DEBUG(dev, QEDR_MSG_QP,
1492 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1493 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1494 		 get_qedr_cq(attrs->send_cq),
1495 		 get_qedr_cq(attrs->send_cq)->icid,
1496 		 get_qedr_cq(attrs->recv_cq),
1497 		 get_qedr_cq(attrs->recv_cq)->icid);
1498 
1499 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1500 	if (!qp) {
1501 		DP_ERR(dev, "create qp: failed allocating memory\n");
1502 		return ERR_PTR(-ENOMEM);
1503 	}
1504 
1505 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1506 
1507 	if (attrs->qp_type == IB_QPT_GSI) {
1508 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1509 		if (IS_ERR(ibqp))
1510 			kfree(qp);
1511 		return ibqp;
1512 	}
1513 
1514 	if (udata)
1515 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1516 	else
1517 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1518 
1519 	if (rc)
1520 		goto err;
1521 
1522 	qp->ibqp.qp_num = qp->qp_id;
1523 
1524 	return &qp->ibqp;
1525 
1526 err:
1527 	kfree(qp);
1528 
1529 	return ERR_PTR(-EFAULT);
1530 }
1531 
1532 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1533 {
1534 	switch (qp_state) {
1535 	case QED_ROCE_QP_STATE_RESET:
1536 		return IB_QPS_RESET;
1537 	case QED_ROCE_QP_STATE_INIT:
1538 		return IB_QPS_INIT;
1539 	case QED_ROCE_QP_STATE_RTR:
1540 		return IB_QPS_RTR;
1541 	case QED_ROCE_QP_STATE_RTS:
1542 		return IB_QPS_RTS;
1543 	case QED_ROCE_QP_STATE_SQD:
1544 		return IB_QPS_SQD;
1545 	case QED_ROCE_QP_STATE_ERR:
1546 		return IB_QPS_ERR;
1547 	case QED_ROCE_QP_STATE_SQE:
1548 		return IB_QPS_SQE;
1549 	}
1550 	return IB_QPS_ERR;
1551 }
1552 
1553 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1554 					enum ib_qp_state qp_state)
1555 {
1556 	switch (qp_state) {
1557 	case IB_QPS_RESET:
1558 		return QED_ROCE_QP_STATE_RESET;
1559 	case IB_QPS_INIT:
1560 		return QED_ROCE_QP_STATE_INIT;
1561 	case IB_QPS_RTR:
1562 		return QED_ROCE_QP_STATE_RTR;
1563 	case IB_QPS_RTS:
1564 		return QED_ROCE_QP_STATE_RTS;
1565 	case IB_QPS_SQD:
1566 		return QED_ROCE_QP_STATE_SQD;
1567 	case IB_QPS_ERR:
1568 		return QED_ROCE_QP_STATE_ERR;
1569 	default:
1570 		return QED_ROCE_QP_STATE_ERR;
1571 	}
1572 }
1573 
1574 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1575 {
1576 	qed_chain_reset(&qph->pbl);
1577 	qph->prod = 0;
1578 	qph->cons = 0;
1579 	qph->wqe_cons = 0;
1580 	qph->db_data.data.value = cpu_to_le16(0);
1581 }
1582 
1583 static int qedr_update_qp_state(struct qedr_dev *dev,
1584 				struct qedr_qp *qp,
1585 				enum qed_roce_qp_state new_state)
1586 {
1587 	int status = 0;
1588 
1589 	if (new_state == qp->state)
1590 		return 0;
1591 
1592 	switch (qp->state) {
1593 	case QED_ROCE_QP_STATE_RESET:
1594 		switch (new_state) {
1595 		case QED_ROCE_QP_STATE_INIT:
1596 			qp->prev_wqe_size = 0;
1597 			qedr_reset_qp_hwq_info(&qp->sq);
1598 			qedr_reset_qp_hwq_info(&qp->rq);
1599 			break;
1600 		default:
1601 			status = -EINVAL;
1602 			break;
1603 		};
1604 		break;
1605 	case QED_ROCE_QP_STATE_INIT:
1606 		switch (new_state) {
1607 		case QED_ROCE_QP_STATE_RTR:
1608 			/* Update doorbell (in case post_recv was
1609 			 * done before move to RTR)
1610 			 */
1611 			wmb();
1612 			writel(qp->rq.db_data.raw, qp->rq.db);
1613 			/* Make sure write takes effect */
1614 			mmiowb();
1615 			break;
1616 		case QED_ROCE_QP_STATE_ERR:
1617 			break;
1618 		default:
1619 			/* Invalid state change. */
1620 			status = -EINVAL;
1621 			break;
1622 		};
1623 		break;
1624 	case QED_ROCE_QP_STATE_RTR:
1625 		/* RTR->XXX */
1626 		switch (new_state) {
1627 		case QED_ROCE_QP_STATE_RTS:
1628 			break;
1629 		case QED_ROCE_QP_STATE_ERR:
1630 			break;
1631 		default:
1632 			/* Invalid state change. */
1633 			status = -EINVAL;
1634 			break;
1635 		};
1636 		break;
1637 	case QED_ROCE_QP_STATE_RTS:
1638 		/* RTS->XXX */
1639 		switch (new_state) {
1640 		case QED_ROCE_QP_STATE_SQD:
1641 			break;
1642 		case QED_ROCE_QP_STATE_ERR:
1643 			break;
1644 		default:
1645 			/* Invalid state change. */
1646 			status = -EINVAL;
1647 			break;
1648 		};
1649 		break;
1650 	case QED_ROCE_QP_STATE_SQD:
1651 		/* SQD->XXX */
1652 		switch (new_state) {
1653 		case QED_ROCE_QP_STATE_RTS:
1654 		case QED_ROCE_QP_STATE_ERR:
1655 			break;
1656 		default:
1657 			/* Invalid state change. */
1658 			status = -EINVAL;
1659 			break;
1660 		};
1661 		break;
1662 	case QED_ROCE_QP_STATE_ERR:
1663 		/* ERR->XXX */
1664 		switch (new_state) {
1665 		case QED_ROCE_QP_STATE_RESET:
1666 			if ((qp->rq.prod != qp->rq.cons) ||
1667 			    (qp->sq.prod != qp->sq.cons)) {
1668 				DP_NOTICE(dev,
1669 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1670 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1671 					  qp->sq.cons);
1672 				status = -EINVAL;
1673 			}
1674 			break;
1675 		default:
1676 			status = -EINVAL;
1677 			break;
1678 		};
1679 		break;
1680 	default:
1681 		status = -EINVAL;
1682 		break;
1683 	};
1684 
1685 	return status;
1686 }
1687 
1688 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1689 		   int attr_mask, struct ib_udata *udata)
1690 {
1691 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1692 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1693 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1694 	enum ib_qp_state old_qp_state, new_qp_state;
1695 	int rc = 0;
1696 
1697 	DP_DEBUG(dev, QEDR_MSG_QP,
1698 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1699 		 attr->qp_state);
1700 
1701 	old_qp_state = qedr_get_ibqp_state(qp->state);
1702 	if (attr_mask & IB_QP_STATE)
1703 		new_qp_state = attr->qp_state;
1704 	else
1705 		new_qp_state = old_qp_state;
1706 
1707 	if (!ib_modify_qp_is_ok
1708 	    (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
1709 	     IB_LINK_LAYER_ETHERNET)) {
1710 		DP_ERR(dev,
1711 		       "modify qp: invalid attribute mask=0x%x specified for\n"
1712 		       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1713 		       attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
1714 		       new_qp_state);
1715 		rc = -EINVAL;
1716 		goto err;
1717 	}
1718 
1719 	/* Translate the masks... */
1720 	if (attr_mask & IB_QP_STATE) {
1721 		SET_FIELD(qp_params.modify_flags,
1722 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1723 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1724 	}
1725 
1726 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1727 		qp_params.sqd_async = true;
1728 
1729 	if (attr_mask & IB_QP_PKEY_INDEX) {
1730 		SET_FIELD(qp_params.modify_flags,
1731 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1732 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1733 			rc = -EINVAL;
1734 			goto err;
1735 		}
1736 
1737 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1738 	}
1739 
1740 	if (attr_mask & IB_QP_QKEY)
1741 		qp->qkey = attr->qkey;
1742 
1743 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1744 		SET_FIELD(qp_params.modify_flags,
1745 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1746 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1747 						  IB_ACCESS_REMOTE_READ;
1748 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1749 						   IB_ACCESS_REMOTE_WRITE;
1750 		qp_params.incoming_atomic_en = attr->qp_access_flags &
1751 					       IB_ACCESS_REMOTE_ATOMIC;
1752 	}
1753 
1754 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1755 		if (attr_mask & IB_QP_PATH_MTU) {
1756 			if (attr->path_mtu < IB_MTU_256 ||
1757 			    attr->path_mtu > IB_MTU_4096) {
1758 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1759 				rc = -EINVAL;
1760 				goto err;
1761 			}
1762 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1763 				      ib_mtu_enum_to_int(iboe_get_mtu
1764 							 (dev->ndev->mtu)));
1765 		}
1766 
1767 		if (!qp->mtu) {
1768 			qp->mtu =
1769 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1770 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1771 		}
1772 
1773 		SET_FIELD(qp_params.modify_flags,
1774 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1775 
1776 		qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class;
1777 		qp_params.flow_label = attr->ah_attr.grh.flow_label;
1778 		qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit;
1779 
1780 		qp->sgid_idx = attr->ah_attr.grh.sgid_index;
1781 
1782 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1783 		if (rc) {
1784 			DP_ERR(dev,
1785 			       "modify qp: problems with GID index %d (rc=%d)\n",
1786 			       attr->ah_attr.grh.sgid_index, rc);
1787 			return rc;
1788 		}
1789 
1790 		rc = qedr_get_dmac(dev, &attr->ah_attr,
1791 				   qp_params.remote_mac_addr);
1792 		if (rc)
1793 			return rc;
1794 
1795 		qp_params.use_local_mac = true;
1796 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
1797 
1798 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
1799 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
1800 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
1801 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
1802 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
1803 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
1804 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
1805 			 qp_params.remote_mac_addr);
1806 
1807 		qp_params.mtu = qp->mtu;
1808 		qp_params.lb_indication = false;
1809 	}
1810 
1811 	if (!qp_params.mtu) {
1812 		/* Stay with current MTU */
1813 		if (qp->mtu)
1814 			qp_params.mtu = qp->mtu;
1815 		else
1816 			qp_params.mtu =
1817 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1818 	}
1819 
1820 	if (attr_mask & IB_QP_TIMEOUT) {
1821 		SET_FIELD(qp_params.modify_flags,
1822 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
1823 
1824 		qp_params.ack_timeout = attr->timeout;
1825 		if (attr->timeout) {
1826 			u32 temp;
1827 
1828 			temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
1829 			/* FW requires [msec] */
1830 			qp_params.ack_timeout = temp;
1831 		} else {
1832 			/* Infinite */
1833 			qp_params.ack_timeout = 0;
1834 		}
1835 	}
1836 	if (attr_mask & IB_QP_RETRY_CNT) {
1837 		SET_FIELD(qp_params.modify_flags,
1838 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
1839 		qp_params.retry_cnt = attr->retry_cnt;
1840 	}
1841 
1842 	if (attr_mask & IB_QP_RNR_RETRY) {
1843 		SET_FIELD(qp_params.modify_flags,
1844 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
1845 		qp_params.rnr_retry_cnt = attr->rnr_retry;
1846 	}
1847 
1848 	if (attr_mask & IB_QP_RQ_PSN) {
1849 		SET_FIELD(qp_params.modify_flags,
1850 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
1851 		qp_params.rq_psn = attr->rq_psn;
1852 		qp->rq_psn = attr->rq_psn;
1853 	}
1854 
1855 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1856 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
1857 			rc = -EINVAL;
1858 			DP_ERR(dev,
1859 			       "unsupported max_rd_atomic=%d, supported=%d\n",
1860 			       attr->max_rd_atomic,
1861 			       dev->attr.max_qp_req_rd_atomic_resc);
1862 			goto err;
1863 		}
1864 
1865 		SET_FIELD(qp_params.modify_flags,
1866 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
1867 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
1868 	}
1869 
1870 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1871 		SET_FIELD(qp_params.modify_flags,
1872 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
1873 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
1874 	}
1875 
1876 	if (attr_mask & IB_QP_SQ_PSN) {
1877 		SET_FIELD(qp_params.modify_flags,
1878 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
1879 		qp_params.sq_psn = attr->sq_psn;
1880 		qp->sq_psn = attr->sq_psn;
1881 	}
1882 
1883 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1884 		if (attr->max_dest_rd_atomic >
1885 		    dev->attr.max_qp_resp_rd_atomic_resc) {
1886 			DP_ERR(dev,
1887 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
1888 			       attr->max_dest_rd_atomic,
1889 			       dev->attr.max_qp_resp_rd_atomic_resc);
1890 
1891 			rc = -EINVAL;
1892 			goto err;
1893 		}
1894 
1895 		SET_FIELD(qp_params.modify_flags,
1896 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
1897 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
1898 	}
1899 
1900 	if (attr_mask & IB_QP_DEST_QPN) {
1901 		SET_FIELD(qp_params.modify_flags,
1902 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
1903 
1904 		qp_params.dest_qp = attr->dest_qp_num;
1905 		qp->dest_qp_num = attr->dest_qp_num;
1906 	}
1907 
1908 	if (qp->qp_type != IB_QPT_GSI)
1909 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
1910 					      qp->qed_qp, &qp_params);
1911 
1912 	if (attr_mask & IB_QP_STATE) {
1913 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
1914 			rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
1915 		qp->state = qp_params.new_state;
1916 	}
1917 
1918 err:
1919 	return rc;
1920 }
1921 
1922 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
1923 {
1924 	int ib_qp_acc_flags = 0;
1925 
1926 	if (params->incoming_rdma_write_en)
1927 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1928 	if (params->incoming_rdma_read_en)
1929 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
1930 	if (params->incoming_atomic_en)
1931 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
1932 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1933 	return ib_qp_acc_flags;
1934 }
1935 
1936 int qedr_query_qp(struct ib_qp *ibqp,
1937 		  struct ib_qp_attr *qp_attr,
1938 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1939 {
1940 	struct qed_rdma_query_qp_out_params params;
1941 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1942 	struct qedr_dev *dev = qp->dev;
1943 	int rc = 0;
1944 
1945 	memset(&params, 0, sizeof(params));
1946 
1947 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
1948 	if (rc)
1949 		goto err;
1950 
1951 	memset(qp_attr, 0, sizeof(*qp_attr));
1952 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
1953 
1954 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
1955 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
1956 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
1957 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
1958 	qp_attr->rq_psn = params.rq_psn;
1959 	qp_attr->sq_psn = params.sq_psn;
1960 	qp_attr->dest_qp_num = params.dest_qp;
1961 
1962 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
1963 
1964 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
1965 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
1966 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
1967 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1968 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
1969 	qp_init_attr->cap = qp_attr->cap;
1970 
1971 	memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], &params.dgid.bytes[0],
1972 	       sizeof(qp_attr->ah_attr.grh.dgid.raw));
1973 
1974 	qp_attr->ah_attr.grh.flow_label = params.flow_label;
1975 	qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
1976 	qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl;
1977 	qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos;
1978 
1979 	qp_attr->ah_attr.ah_flags = IB_AH_GRH;
1980 	qp_attr->ah_attr.port_num = 1;
1981 	qp_attr->ah_attr.sl = 0;
1982 	qp_attr->timeout = params.timeout;
1983 	qp_attr->rnr_retry = params.rnr_retry;
1984 	qp_attr->retry_cnt = params.retry_cnt;
1985 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
1986 	qp_attr->pkey_index = params.pkey_index;
1987 	qp_attr->port_num = 1;
1988 	qp_attr->ah_attr.src_path_bits = 0;
1989 	qp_attr->ah_attr.static_rate = 0;
1990 	qp_attr->alt_pkey_index = 0;
1991 	qp_attr->alt_port_num = 0;
1992 	qp_attr->alt_timeout = 0;
1993 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1994 
1995 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
1996 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
1997 	qp_attr->max_rd_atomic = params.max_rd_atomic;
1998 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
1999 
2000 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2001 		 qp_attr->cap.max_inline_data);
2002 
2003 err:
2004 	return rc;
2005 }
2006 
2007 int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2008 {
2009 	int rc = 0;
2010 
2011 	if (qp->qp_type != IB_QPT_GSI) {
2012 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2013 		if (rc)
2014 			return rc;
2015 	}
2016 
2017 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2018 		qedr_cleanup_user(dev, qp);
2019 	else
2020 		qedr_cleanup_kernel(dev, qp);
2021 
2022 	return 0;
2023 }
2024 
2025 int qedr_destroy_qp(struct ib_qp *ibqp)
2026 {
2027 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2028 	struct qedr_dev *dev = qp->dev;
2029 	struct ib_qp_attr attr;
2030 	int attr_mask = 0;
2031 	int rc = 0;
2032 
2033 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2034 		 qp, qp->qp_type);
2035 
2036 	if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2037 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2038 	    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2039 
2040 		attr.qp_state = IB_QPS_ERR;
2041 		attr_mask |= IB_QP_STATE;
2042 
2043 		/* Change the QP state to ERROR */
2044 		qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2045 	}
2046 
2047 	if (qp->qp_type == IB_QPT_GSI)
2048 		qedr_destroy_gsi_qp(dev);
2049 
2050 	qedr_free_qp_resources(dev, qp);
2051 
2052 	kfree(qp);
2053 
2054 	return rc;
2055 }
2056 
2057 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
2058 			     struct ib_udata *udata)
2059 {
2060 	struct qedr_ah *ah;
2061 
2062 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2063 	if (!ah)
2064 		return ERR_PTR(-ENOMEM);
2065 
2066 	ah->attr = *attr;
2067 
2068 	return &ah->ibah;
2069 }
2070 
2071 int qedr_destroy_ah(struct ib_ah *ibah)
2072 {
2073 	struct qedr_ah *ah = get_qedr_ah(ibah);
2074 
2075 	kfree(ah);
2076 	return 0;
2077 }
2078 
2079 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2080 {
2081 	struct qedr_pbl *pbl, *tmp;
2082 
2083 	if (info->pbl_table)
2084 		list_add_tail(&info->pbl_table->list_entry,
2085 			      &info->free_pbl_list);
2086 
2087 	if (!list_empty(&info->inuse_pbl_list))
2088 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2089 
2090 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2091 		list_del(&pbl->list_entry);
2092 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2093 	}
2094 }
2095 
2096 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2097 			size_t page_list_len, bool two_layered)
2098 {
2099 	struct qedr_pbl *tmp;
2100 	int rc;
2101 
2102 	INIT_LIST_HEAD(&info->free_pbl_list);
2103 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2104 
2105 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2106 				  page_list_len, two_layered);
2107 	if (rc)
2108 		goto done;
2109 
2110 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2111 	if (IS_ERR(info->pbl_table)) {
2112 		rc = PTR_ERR(info->pbl_table);
2113 		goto done;
2114 	}
2115 
2116 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2117 		 &info->pbl_table->pa);
2118 
2119 	/* in usual case we use 2 PBLs, so we add one to free
2120 	 * list and allocating another one
2121 	 */
2122 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2123 	if (IS_ERR(tmp)) {
2124 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2125 		goto done;
2126 	}
2127 
2128 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2129 
2130 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2131 
2132 done:
2133 	if (rc)
2134 		free_mr_info(dev, info);
2135 
2136 	return rc;
2137 }
2138 
2139 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2140 			       u64 usr_addr, int acc, struct ib_udata *udata)
2141 {
2142 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2143 	struct qedr_mr *mr;
2144 	struct qedr_pd *pd;
2145 	int rc = -ENOMEM;
2146 
2147 	pd = get_qedr_pd(ibpd);
2148 	DP_DEBUG(dev, QEDR_MSG_MR,
2149 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2150 		 pd->pd_id, start, len, usr_addr, acc);
2151 
2152 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2153 		return ERR_PTR(-EINVAL);
2154 
2155 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2156 	if (!mr)
2157 		return ERR_PTR(rc);
2158 
2159 	mr->type = QEDR_MR_USER;
2160 
2161 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2162 	if (IS_ERR(mr->umem)) {
2163 		rc = -EFAULT;
2164 		goto err0;
2165 	}
2166 
2167 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2168 	if (rc)
2169 		goto err1;
2170 
2171 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2172 			   &mr->info.pbl_info);
2173 
2174 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2175 	if (rc) {
2176 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2177 		goto err1;
2178 	}
2179 
2180 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2181 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2182 	mr->hw_mr.key = 0;
2183 	mr->hw_mr.pd = pd->pd_id;
2184 	mr->hw_mr.local_read = 1;
2185 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2186 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2187 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2188 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2189 	mr->hw_mr.mw_bind = false;
2190 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2191 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2192 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2193 	mr->hw_mr.page_size_log = ilog2(mr->umem->page_size);
2194 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2195 	mr->hw_mr.length = len;
2196 	mr->hw_mr.vaddr = usr_addr;
2197 	mr->hw_mr.zbva = false;
2198 	mr->hw_mr.phy_mr = false;
2199 	mr->hw_mr.dma_mr = false;
2200 
2201 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2202 	if (rc) {
2203 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2204 		goto err2;
2205 	}
2206 
2207 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2208 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2209 	    mr->hw_mr.remote_atomic)
2210 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2211 
2212 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2213 		 mr->ibmr.lkey);
2214 	return &mr->ibmr;
2215 
2216 err2:
2217 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2218 err1:
2219 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2220 err0:
2221 	kfree(mr);
2222 	return ERR_PTR(rc);
2223 }
2224 
2225 int qedr_dereg_mr(struct ib_mr *ib_mr)
2226 {
2227 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2228 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2229 	int rc = 0;
2230 
2231 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2232 	if (rc)
2233 		return rc;
2234 
2235 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2236 
2237 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2238 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2239 
2240 	/* it could be user registered memory. */
2241 	if (mr->umem)
2242 		ib_umem_release(mr->umem);
2243 
2244 	kfree(mr);
2245 
2246 	return rc;
2247 }
2248 
2249 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2250 				       int max_page_list_len)
2251 {
2252 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2253 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2254 	struct qedr_mr *mr;
2255 	int rc = -ENOMEM;
2256 
2257 	DP_DEBUG(dev, QEDR_MSG_MR,
2258 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2259 		 max_page_list_len);
2260 
2261 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2262 	if (!mr)
2263 		return ERR_PTR(rc);
2264 
2265 	mr->dev = dev;
2266 	mr->type = QEDR_MR_FRMR;
2267 
2268 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2269 	if (rc)
2270 		goto err0;
2271 
2272 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2273 	if (rc) {
2274 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2275 		goto err0;
2276 	}
2277 
2278 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2279 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2280 	mr->hw_mr.key = 0;
2281 	mr->hw_mr.pd = pd->pd_id;
2282 	mr->hw_mr.local_read = 1;
2283 	mr->hw_mr.local_write = 0;
2284 	mr->hw_mr.remote_read = 0;
2285 	mr->hw_mr.remote_write = 0;
2286 	mr->hw_mr.remote_atomic = 0;
2287 	mr->hw_mr.mw_bind = false;
2288 	mr->hw_mr.pbl_ptr = 0;
2289 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2290 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2291 	mr->hw_mr.fbo = 0;
2292 	mr->hw_mr.length = 0;
2293 	mr->hw_mr.vaddr = 0;
2294 	mr->hw_mr.zbva = false;
2295 	mr->hw_mr.phy_mr = true;
2296 	mr->hw_mr.dma_mr = false;
2297 
2298 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2299 	if (rc) {
2300 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2301 		goto err1;
2302 	}
2303 
2304 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2305 	mr->ibmr.rkey = mr->ibmr.lkey;
2306 
2307 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2308 	return mr;
2309 
2310 err1:
2311 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2312 err0:
2313 	kfree(mr);
2314 	return ERR_PTR(rc);
2315 }
2316 
2317 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2318 			    enum ib_mr_type mr_type, u32 max_num_sg)
2319 {
2320 	struct qedr_dev *dev;
2321 	struct qedr_mr *mr;
2322 
2323 	if (mr_type != IB_MR_TYPE_MEM_REG)
2324 		return ERR_PTR(-EINVAL);
2325 
2326 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2327 
2328 	if (IS_ERR(mr))
2329 		return ERR_PTR(-EINVAL);
2330 
2331 	dev = mr->dev;
2332 
2333 	return &mr->ibmr;
2334 }
2335 
2336 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2337 {
2338 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2339 	struct qedr_pbl *pbl_table;
2340 	struct regpair *pbe;
2341 	u32 pbes_in_page;
2342 
2343 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2344 		DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2345 		return -ENOMEM;
2346 	}
2347 
2348 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2349 		 mr->npages, addr);
2350 
2351 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2352 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2353 	pbe = (struct regpair *)pbl_table->va;
2354 	pbe +=  mr->npages % pbes_in_page;
2355 	pbe->lo = cpu_to_le32((u32)addr);
2356 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2357 
2358 	mr->npages++;
2359 
2360 	return 0;
2361 }
2362 
2363 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2364 {
2365 	int work = info->completed - info->completed_handled - 1;
2366 
2367 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2368 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2369 		struct qedr_pbl *pbl;
2370 
2371 		/* Free all the page list that are possible to be freed
2372 		 * (all the ones that were invalidated), under the assumption
2373 		 * that if an FMR was completed successfully that means that
2374 		 * if there was an invalidate operation before it also ended
2375 		 */
2376 		pbl = list_first_entry(&info->inuse_pbl_list,
2377 				       struct qedr_pbl, list_entry);
2378 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2379 		info->completed_handled++;
2380 	}
2381 }
2382 
2383 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2384 		   int sg_nents, unsigned int *sg_offset)
2385 {
2386 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2387 
2388 	mr->npages = 0;
2389 
2390 	handle_completed_mrs(mr->dev, &mr->info);
2391 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2392 }
2393 
2394 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2395 {
2396 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2397 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2398 	struct qedr_mr *mr;
2399 	int rc;
2400 
2401 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2402 	if (!mr)
2403 		return ERR_PTR(-ENOMEM);
2404 
2405 	mr->type = QEDR_MR_DMA;
2406 
2407 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2408 	if (rc) {
2409 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2410 		goto err1;
2411 	}
2412 
2413 	/* index only, 18 bit long, lkey = itid << 8 | key */
2414 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2415 	mr->hw_mr.pd = pd->pd_id;
2416 	mr->hw_mr.local_read = 1;
2417 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2418 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2419 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2420 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2421 	mr->hw_mr.dma_mr = true;
2422 
2423 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2424 	if (rc) {
2425 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2426 		goto err2;
2427 	}
2428 
2429 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2430 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2431 	    mr->hw_mr.remote_atomic)
2432 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2433 
2434 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2435 	return &mr->ibmr;
2436 
2437 err2:
2438 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2439 err1:
2440 	kfree(mr);
2441 	return ERR_PTR(rc);
2442 }
2443 
2444 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2445 {
2446 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2447 }
2448 
2449 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2450 {
2451 	int i, len = 0;
2452 
2453 	for (i = 0; i < num_sge; i++)
2454 		len += sg_list[i].length;
2455 
2456 	return len;
2457 }
2458 
2459 static void swap_wqe_data64(u64 *p)
2460 {
2461 	int i;
2462 
2463 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2464 		*p = cpu_to_be64(cpu_to_le64(*p));
2465 }
2466 
2467 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2468 				       struct qedr_qp *qp, u8 *wqe_size,
2469 				       struct ib_send_wr *wr,
2470 				       struct ib_send_wr **bad_wr, u8 *bits,
2471 				       u8 bit)
2472 {
2473 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2474 	char *seg_prt, *wqe;
2475 	int i, seg_siz;
2476 
2477 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2478 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2479 		*bad_wr = wr;
2480 		return 0;
2481 	}
2482 
2483 	if (!data_size)
2484 		return data_size;
2485 
2486 	*bits |= bit;
2487 
2488 	seg_prt = NULL;
2489 	wqe = NULL;
2490 	seg_siz = 0;
2491 
2492 	/* Copy data inline */
2493 	for (i = 0; i < wr->num_sge; i++) {
2494 		u32 len = wr->sg_list[i].length;
2495 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2496 
2497 		while (len > 0) {
2498 			u32 cur;
2499 
2500 			/* New segment required */
2501 			if (!seg_siz) {
2502 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2503 				seg_prt = wqe;
2504 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2505 				(*wqe_size)++;
2506 			}
2507 
2508 			/* Calculate currently allowed length */
2509 			cur = min_t(u32, len, seg_siz);
2510 			memcpy(seg_prt, src, cur);
2511 
2512 			/* Update segment variables */
2513 			seg_prt += cur;
2514 			seg_siz -= cur;
2515 
2516 			/* Update sge variables */
2517 			src += cur;
2518 			len -= cur;
2519 
2520 			/* Swap fully-completed segments */
2521 			if (!seg_siz)
2522 				swap_wqe_data64((u64 *)wqe);
2523 		}
2524 	}
2525 
2526 	/* swap last not completed segment */
2527 	if (seg_siz)
2528 		swap_wqe_data64((u64 *)wqe);
2529 
2530 	return data_size;
2531 }
2532 
2533 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2534 	do {							\
2535 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2536 		(sge)->length = cpu_to_le32(vlength);		\
2537 		(sge)->flags = cpu_to_le32(vflags);		\
2538 	} while (0)
2539 
2540 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2541 	do {							\
2542 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2543 		(hdr)->num_sges = num_sge;			\
2544 	} while (0)
2545 
2546 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2547 	do {							\
2548 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2549 		(sge)->length = cpu_to_le32(vlength);		\
2550 		(sge)->l_key = cpu_to_le32(vlkey);		\
2551 	} while (0)
2552 
2553 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2554 				struct ib_send_wr *wr)
2555 {
2556 	u32 data_size = 0;
2557 	int i;
2558 
2559 	for (i = 0; i < wr->num_sge; i++) {
2560 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2561 
2562 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2563 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2564 		sge->length = cpu_to_le32(wr->sg_list[i].length);
2565 		data_size += wr->sg_list[i].length;
2566 	}
2567 
2568 	if (wqe_size)
2569 		*wqe_size += wr->num_sge;
2570 
2571 	return data_size;
2572 }
2573 
2574 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2575 				     struct qedr_qp *qp,
2576 				     struct rdma_sq_rdma_wqe_1st *rwqe,
2577 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2578 				     struct ib_send_wr *wr,
2579 				     struct ib_send_wr **bad_wr)
2580 {
2581 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2582 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2583 
2584 	if (wr->send_flags & IB_SEND_INLINE &&
2585 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2586 	     wr->opcode == IB_WR_RDMA_WRITE)) {
2587 		u8 flags = 0;
2588 
2589 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2590 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2591 						   bad_wr, &rwqe->flags, flags);
2592 	}
2593 
2594 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2595 }
2596 
2597 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2598 				     struct qedr_qp *qp,
2599 				     struct rdma_sq_send_wqe_1st *swqe,
2600 				     struct rdma_sq_send_wqe_2st *swqe2,
2601 				     struct ib_send_wr *wr,
2602 				     struct ib_send_wr **bad_wr)
2603 {
2604 	memset(swqe2, 0, sizeof(*swqe2));
2605 	if (wr->send_flags & IB_SEND_INLINE) {
2606 		u8 flags = 0;
2607 
2608 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2609 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2610 						   bad_wr, &swqe->flags, flags);
2611 	}
2612 
2613 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2614 }
2615 
2616 static int qedr_prepare_reg(struct qedr_qp *qp,
2617 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2618 			    struct ib_reg_wr *wr)
2619 {
2620 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2621 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2622 
2623 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2624 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2625 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2626 	fwqe1->l_key = wr->key;
2627 
2628 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2629 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2630 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2631 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2632 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2633 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2634 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2635 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2636 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2637 	fwqe2->fmr_ctrl = 0;
2638 
2639 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2640 		   ilog2(mr->ibmr.page_size) - 12);
2641 
2642 	fwqe2->length_hi = 0;
2643 	fwqe2->length_lo = mr->ibmr.length;
2644 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2645 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2646 
2647 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2648 
2649 	return 0;
2650 }
2651 
2652 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2653 {
2654 	switch (opcode) {
2655 	case IB_WR_RDMA_WRITE:
2656 	case IB_WR_RDMA_WRITE_WITH_IMM:
2657 		return IB_WC_RDMA_WRITE;
2658 	case IB_WR_SEND_WITH_IMM:
2659 	case IB_WR_SEND:
2660 	case IB_WR_SEND_WITH_INV:
2661 		return IB_WC_SEND;
2662 	case IB_WR_RDMA_READ:
2663 		return IB_WC_RDMA_READ;
2664 	case IB_WR_ATOMIC_CMP_AND_SWP:
2665 		return IB_WC_COMP_SWAP;
2666 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2667 		return IB_WC_FETCH_ADD;
2668 	case IB_WR_REG_MR:
2669 		return IB_WC_REG_MR;
2670 	case IB_WR_LOCAL_INV:
2671 		return IB_WC_LOCAL_INV;
2672 	default:
2673 		return IB_WC_SEND;
2674 	}
2675 }
2676 
2677 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2678 {
2679 	int wq_is_full, err_wr, pbl_is_full;
2680 	struct qedr_dev *dev = qp->dev;
2681 
2682 	/* prevent SQ overflow and/or processing of a bad WR */
2683 	err_wr = wr->num_sge > qp->sq.max_sges;
2684 	wq_is_full = qedr_wq_is_full(&qp->sq);
2685 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2686 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2687 	if (wq_is_full || err_wr || pbl_is_full) {
2688 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2689 			DP_ERR(dev,
2690 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2691 			       qp);
2692 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2693 		}
2694 
2695 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2696 			DP_ERR(dev,
2697 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2698 			       qp);
2699 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2700 		}
2701 
2702 		if (pbl_is_full &&
2703 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2704 			DP_ERR(dev,
2705 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2706 			       qp);
2707 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2708 		}
2709 		return false;
2710 	}
2711 	return true;
2712 }
2713 
2714 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2715 		     struct ib_send_wr **bad_wr)
2716 {
2717 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2718 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2719 	struct rdma_sq_atomic_wqe_1st *awqe1;
2720 	struct rdma_sq_atomic_wqe_2nd *awqe2;
2721 	struct rdma_sq_atomic_wqe_3rd *awqe3;
2722 	struct rdma_sq_send_wqe_2st *swqe2;
2723 	struct rdma_sq_local_inv_wqe *iwqe;
2724 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2725 	struct rdma_sq_send_wqe_1st *swqe;
2726 	struct rdma_sq_rdma_wqe_1st *rwqe;
2727 	struct rdma_sq_fmr_wqe_1st *fwqe1;
2728 	struct rdma_sq_common_wqe *wqe;
2729 	u32 length;
2730 	int rc = 0;
2731 	bool comp;
2732 
2733 	if (!qedr_can_post_send(qp, wr)) {
2734 		*bad_wr = wr;
2735 		return -ENOMEM;
2736 	}
2737 
2738 	wqe = qed_chain_produce(&qp->sq.pbl);
2739 	qp->wqe_wr_id[qp->sq.prod].signaled =
2740 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2741 
2742 	wqe->flags = 0;
2743 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2744 		   !!(wr->send_flags & IB_SEND_SOLICITED));
2745 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2746 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2747 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2748 		   !!(wr->send_flags & IB_SEND_FENCE));
2749 	wqe->prev_wqe_size = qp->prev_wqe_size;
2750 
2751 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2752 
2753 	switch (wr->opcode) {
2754 	case IB_WR_SEND_WITH_IMM:
2755 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
2756 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2757 		swqe->wqe_size = 2;
2758 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2759 
2760 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
2761 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2762 						   wr, bad_wr);
2763 		swqe->length = cpu_to_le32(length);
2764 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2765 		qp->prev_wqe_size = swqe->wqe_size;
2766 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2767 		break;
2768 	case IB_WR_SEND:
2769 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
2770 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2771 
2772 		swqe->wqe_size = 2;
2773 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2774 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2775 						   wr, bad_wr);
2776 		swqe->length = cpu_to_le32(length);
2777 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2778 		qp->prev_wqe_size = swqe->wqe_size;
2779 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2780 		break;
2781 	case IB_WR_SEND_WITH_INV:
2782 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
2783 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2784 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2785 		swqe->wqe_size = 2;
2786 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
2787 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2788 						   wr, bad_wr);
2789 		swqe->length = cpu_to_le32(length);
2790 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2791 		qp->prev_wqe_size = swqe->wqe_size;
2792 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2793 		break;
2794 
2795 	case IB_WR_RDMA_WRITE_WITH_IMM:
2796 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
2797 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2798 
2799 		rwqe->wqe_size = 2;
2800 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
2801 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2802 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2803 						   wr, bad_wr);
2804 		rwqe->length = cpu_to_le32(length);
2805 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2806 		qp->prev_wqe_size = rwqe->wqe_size;
2807 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2808 		break;
2809 	case IB_WR_RDMA_WRITE:
2810 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
2811 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2812 
2813 		rwqe->wqe_size = 2;
2814 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2815 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2816 						   wr, bad_wr);
2817 		rwqe->length = cpu_to_le32(length);
2818 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2819 		qp->prev_wqe_size = rwqe->wqe_size;
2820 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2821 		break;
2822 	case IB_WR_RDMA_READ_WITH_INV:
2823 		DP_ERR(dev,
2824 		       "RDMA READ WITH INVALIDATE not supported\n");
2825 		*bad_wr = wr;
2826 		rc = -EINVAL;
2827 		break;
2828 
2829 	case IB_WR_RDMA_READ:
2830 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
2831 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2832 
2833 		rwqe->wqe_size = 2;
2834 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2835 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2836 						   wr, bad_wr);
2837 		rwqe->length = cpu_to_le32(length);
2838 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2839 		qp->prev_wqe_size = rwqe->wqe_size;
2840 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2841 		break;
2842 
2843 	case IB_WR_ATOMIC_CMP_AND_SWP:
2844 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2845 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
2846 		awqe1->wqe_size = 4;
2847 
2848 		awqe2 = qed_chain_produce(&qp->sq.pbl);
2849 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
2850 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
2851 
2852 		awqe3 = qed_chain_produce(&qp->sq.pbl);
2853 
2854 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2855 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
2856 			DMA_REGPAIR_LE(awqe3->swap_data,
2857 				       atomic_wr(wr)->compare_add);
2858 		} else {
2859 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
2860 			DMA_REGPAIR_LE(awqe3->swap_data,
2861 				       atomic_wr(wr)->swap);
2862 			DMA_REGPAIR_LE(awqe3->cmp_data,
2863 				       atomic_wr(wr)->compare_add);
2864 		}
2865 
2866 		qedr_prepare_sq_sges(qp, NULL, wr);
2867 
2868 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
2869 		qp->prev_wqe_size = awqe1->wqe_size;
2870 		break;
2871 
2872 	case IB_WR_LOCAL_INV:
2873 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
2874 		iwqe->wqe_size = 1;
2875 
2876 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
2877 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
2878 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
2879 		qp->prev_wqe_size = iwqe->wqe_size;
2880 		break;
2881 	case IB_WR_REG_MR:
2882 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
2883 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
2884 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
2885 		fwqe1->wqe_size = 2;
2886 
2887 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
2888 		if (rc) {
2889 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
2890 			*bad_wr = wr;
2891 			break;
2892 		}
2893 
2894 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
2895 		qp->prev_wqe_size = fwqe1->wqe_size;
2896 		break;
2897 	default:
2898 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
2899 		rc = -EINVAL;
2900 		*bad_wr = wr;
2901 		break;
2902 	}
2903 
2904 	if (*bad_wr) {
2905 		u16 value;
2906 
2907 		/* Restore prod to its position before
2908 		 * this WR was processed
2909 		 */
2910 		value = le16_to_cpu(qp->sq.db_data.data.value);
2911 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
2912 
2913 		/* Restore prev_wqe_size */
2914 		qp->prev_wqe_size = wqe->prev_wqe_size;
2915 		rc = -EINVAL;
2916 		DP_ERR(dev, "POST SEND FAILED\n");
2917 	}
2918 
2919 	return rc;
2920 }
2921 
2922 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2923 		   struct ib_send_wr **bad_wr)
2924 {
2925 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2926 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2927 	unsigned long flags;
2928 	int rc = 0;
2929 
2930 	*bad_wr = NULL;
2931 
2932 	if (qp->qp_type == IB_QPT_GSI)
2933 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
2934 
2935 	spin_lock_irqsave(&qp->q_lock, flags);
2936 
2937 	if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
2938 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2939 	    (qp->state != QED_ROCE_QP_STATE_SQD)) {
2940 		spin_unlock_irqrestore(&qp->q_lock, flags);
2941 		*bad_wr = wr;
2942 		DP_DEBUG(dev, QEDR_MSG_CQ,
2943 			 "QP in wrong state! QP icid=0x%x state %d\n",
2944 			 qp->icid, qp->state);
2945 		return -EINVAL;
2946 	}
2947 
2948 	while (wr) {
2949 		rc = __qedr_post_send(ibqp, wr, bad_wr);
2950 		if (rc)
2951 			break;
2952 
2953 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
2954 
2955 		qedr_inc_sw_prod(&qp->sq);
2956 
2957 		qp->sq.db_data.data.value++;
2958 
2959 		wr = wr->next;
2960 	}
2961 
2962 	/* Trigger doorbell
2963 	 * If there was a failure in the first WR then it will be triggered in
2964 	 * vane. However this is not harmful (as long as the producer value is
2965 	 * unchanged). For performance reasons we avoid checking for this
2966 	 * redundant doorbell.
2967 	 */
2968 	wmb();
2969 	writel(qp->sq.db_data.raw, qp->sq.db);
2970 
2971 	/* Make sure write sticks */
2972 	mmiowb();
2973 
2974 	spin_unlock_irqrestore(&qp->q_lock, flags);
2975 
2976 	return rc;
2977 }
2978 
2979 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2980 		   struct ib_recv_wr **bad_wr)
2981 {
2982 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2983 	struct qedr_dev *dev = qp->dev;
2984 	unsigned long flags;
2985 	int status = 0;
2986 
2987 	if (qp->qp_type == IB_QPT_GSI)
2988 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
2989 
2990 	spin_lock_irqsave(&qp->q_lock, flags);
2991 
2992 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
2993 		spin_unlock_irqrestore(&qp->q_lock, flags);
2994 		*bad_wr = wr;
2995 		return -EINVAL;
2996 	}
2997 
2998 	while (wr) {
2999 		int i;
3000 
3001 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3002 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3003 		    wr->num_sge > qp->rq.max_sges) {
3004 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3005 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3006 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3007 			       qp->rq.max_sges);
3008 			status = -ENOMEM;
3009 			*bad_wr = wr;
3010 			break;
3011 		}
3012 		for (i = 0; i < wr->num_sge; i++) {
3013 			u32 flags = 0;
3014 			struct rdma_rq_sge *rqe =
3015 			    qed_chain_produce(&qp->rq.pbl);
3016 
3017 			/* First one must include the number
3018 			 * of SGE in the list
3019 			 */
3020 			if (!i)
3021 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3022 					  wr->num_sge);
3023 
3024 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3025 				  wr->sg_list[i].lkey);
3026 
3027 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3028 				   wr->sg_list[i].length, flags);
3029 		}
3030 
3031 		/* Special case of no sges. FW requires between 1-4 sges...
3032 		 * in this case we need to post 1 sge with length zero. this is
3033 		 * because rdma write with immediate consumes an RQ.
3034 		 */
3035 		if (!wr->num_sge) {
3036 			u32 flags = 0;
3037 			struct rdma_rq_sge *rqe =
3038 			    qed_chain_produce(&qp->rq.pbl);
3039 
3040 			/* First one must include the number
3041 			 * of SGE in the list
3042 			 */
3043 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3044 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3045 
3046 			RQ_SGE_SET(rqe, 0, 0, flags);
3047 			i = 1;
3048 		}
3049 
3050 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3051 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3052 
3053 		qedr_inc_sw_prod(&qp->rq);
3054 
3055 		/* Flush all the writes before signalling doorbell */
3056 		wmb();
3057 
3058 		qp->rq.db_data.data.value++;
3059 
3060 		writel(qp->rq.db_data.raw, qp->rq.db);
3061 
3062 		/* Make sure write sticks */
3063 		mmiowb();
3064 
3065 		wr = wr->next;
3066 	}
3067 
3068 	spin_unlock_irqrestore(&qp->q_lock, flags);
3069 
3070 	return status;
3071 }
3072 
3073 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3074 {
3075 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3076 
3077 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3078 		cq->pbl_toggle;
3079 }
3080 
3081 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3082 {
3083 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3084 	struct qedr_qp *qp;
3085 
3086 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3087 						   resp_cqe->qp_handle.lo,
3088 						   u64);
3089 	return qp;
3090 }
3091 
3092 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3093 {
3094 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3095 
3096 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3097 }
3098 
3099 /* Return latest CQE (needs processing) */
3100 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3101 {
3102 	return cq->latest_cqe;
3103 }
3104 
3105 /* In fmr we need to increase the number of fmr completed counter for the fmr
3106  * algorithm determining whether we can free a pbl or not.
3107  * we need to perform this whether the work request was signaled or not. for
3108  * this purpose we call this function from the condition that checks if a wr
3109  * should be skipped, to make sure we don't miss it ( possibly this fmr
3110  * operation was not signalted)
3111  */
3112 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3113 {
3114 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3115 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3116 }
3117 
3118 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3119 		       struct qedr_cq *cq, int num_entries,
3120 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3121 		       int force)
3122 {
3123 	u16 cnt = 0;
3124 
3125 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3126 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3127 			qedr_chk_if_fmr(qp);
3128 			/* skip WC */
3129 			goto next_cqe;
3130 		}
3131 
3132 		/* fill WC */
3133 		wc->status = status;
3134 		wc->vendor_err = 0;
3135 		wc->wc_flags = 0;
3136 		wc->src_qp = qp->id;
3137 		wc->qp = &qp->ibqp;
3138 
3139 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3140 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3141 
3142 		switch (wc->opcode) {
3143 		case IB_WC_RDMA_WRITE:
3144 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3145 			break;
3146 		case IB_WC_COMP_SWAP:
3147 		case IB_WC_FETCH_ADD:
3148 			wc->byte_len = 8;
3149 			break;
3150 		case IB_WC_REG_MR:
3151 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3152 			break;
3153 		default:
3154 			break;
3155 		}
3156 
3157 		num_entries--;
3158 		wc++;
3159 		cnt++;
3160 next_cqe:
3161 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3162 			qed_chain_consume(&qp->sq.pbl);
3163 		qedr_inc_sw_cons(&qp->sq);
3164 	}
3165 
3166 	return cnt;
3167 }
3168 
3169 static int qedr_poll_cq_req(struct qedr_dev *dev,
3170 			    struct qedr_qp *qp, struct qedr_cq *cq,
3171 			    int num_entries, struct ib_wc *wc,
3172 			    struct rdma_cqe_requester *req)
3173 {
3174 	int cnt = 0;
3175 
3176 	switch (req->status) {
3177 	case RDMA_CQE_REQ_STS_OK:
3178 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3179 				  IB_WC_SUCCESS, 0);
3180 		break;
3181 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3182 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3183 			DP_ERR(dev,
3184 			       "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3185 			       cq->icid, qp->icid);
3186 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3187 				  IB_WC_WR_FLUSH_ERR, 1);
3188 		break;
3189 	default:
3190 		/* process all WQE before the cosumer */
3191 		qp->state = QED_ROCE_QP_STATE_ERR;
3192 		cnt = process_req(dev, qp, cq, num_entries, wc,
3193 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3194 		wc += cnt;
3195 		/* if we have extra WC fill it with actual error info */
3196 		if (cnt < num_entries) {
3197 			enum ib_wc_status wc_status;
3198 
3199 			switch (req->status) {
3200 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3201 				DP_ERR(dev,
3202 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3203 				       cq->icid, qp->icid);
3204 				wc_status = IB_WC_BAD_RESP_ERR;
3205 				break;
3206 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3207 				DP_ERR(dev,
3208 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3209 				       cq->icid, qp->icid);
3210 				wc_status = IB_WC_LOC_LEN_ERR;
3211 				break;
3212 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3213 				DP_ERR(dev,
3214 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3215 				       cq->icid, qp->icid);
3216 				wc_status = IB_WC_LOC_QP_OP_ERR;
3217 				break;
3218 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3219 				DP_ERR(dev,
3220 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3221 				       cq->icid, qp->icid);
3222 				wc_status = IB_WC_LOC_PROT_ERR;
3223 				break;
3224 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3225 				DP_ERR(dev,
3226 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3227 				       cq->icid, qp->icid);
3228 				wc_status = IB_WC_MW_BIND_ERR;
3229 				break;
3230 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3231 				DP_ERR(dev,
3232 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3233 				       cq->icid, qp->icid);
3234 				wc_status = IB_WC_REM_INV_REQ_ERR;
3235 				break;
3236 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3237 				DP_ERR(dev,
3238 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3239 				       cq->icid, qp->icid);
3240 				wc_status = IB_WC_REM_ACCESS_ERR;
3241 				break;
3242 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3243 				DP_ERR(dev,
3244 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3245 				       cq->icid, qp->icid);
3246 				wc_status = IB_WC_REM_OP_ERR;
3247 				break;
3248 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3249 				DP_ERR(dev,
3250 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3251 				       cq->icid, qp->icid);
3252 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3253 				break;
3254 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3255 				DP_ERR(dev,
3256 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3257 				       cq->icid, qp->icid);
3258 				wc_status = IB_WC_RETRY_EXC_ERR;
3259 				break;
3260 			default:
3261 				DP_ERR(dev,
3262 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3263 				       cq->icid, qp->icid);
3264 				wc_status = IB_WC_GENERAL_ERR;
3265 			}
3266 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3267 					   wc_status, 1);
3268 		}
3269 	}
3270 
3271 	return cnt;
3272 }
3273 
3274 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3275 			       struct qedr_cq *cq, struct ib_wc *wc,
3276 			       struct rdma_cqe_responder *resp, u64 wr_id)
3277 {
3278 	enum ib_wc_status wc_status = IB_WC_SUCCESS;
3279 	u8 flags;
3280 
3281 	wc->opcode = IB_WC_RECV;
3282 	wc->wc_flags = 0;
3283 
3284 	switch (resp->status) {
3285 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3286 		wc_status = IB_WC_LOC_ACCESS_ERR;
3287 		break;
3288 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3289 		wc_status = IB_WC_LOC_LEN_ERR;
3290 		break;
3291 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3292 		wc_status = IB_WC_LOC_QP_OP_ERR;
3293 		break;
3294 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3295 		wc_status = IB_WC_LOC_PROT_ERR;
3296 		break;
3297 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3298 		wc_status = IB_WC_MW_BIND_ERR;
3299 		break;
3300 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3301 		wc_status = IB_WC_REM_INV_RD_REQ_ERR;
3302 		break;
3303 	case RDMA_CQE_RESP_STS_OK:
3304 		wc_status = IB_WC_SUCCESS;
3305 		wc->byte_len = le32_to_cpu(resp->length);
3306 
3307 		flags = resp->flags & QEDR_RESP_RDMA_IMM;
3308 
3309 		if (flags == QEDR_RESP_RDMA_IMM)
3310 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3311 
3312 		if (flags == QEDR_RESP_RDMA_IMM || flags == QEDR_RESP_IMM) {
3313 			wc->ex.imm_data =
3314 				le32_to_cpu(resp->imm_data_or_inv_r_Key);
3315 			wc->wc_flags |= IB_WC_WITH_IMM;
3316 		}
3317 		break;
3318 	default:
3319 		wc->status = IB_WC_GENERAL_ERR;
3320 		DP_ERR(dev, "Invalid CQE status detected\n");
3321 	}
3322 
3323 	/* fill WC */
3324 	wc->status = wc_status;
3325 	wc->vendor_err = 0;
3326 	wc->src_qp = qp->id;
3327 	wc->qp = &qp->ibqp;
3328 	wc->wr_id = wr_id;
3329 }
3330 
3331 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3332 			    struct qedr_cq *cq, struct ib_wc *wc,
3333 			    struct rdma_cqe_responder *resp)
3334 {
3335 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3336 
3337 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3338 
3339 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3340 		qed_chain_consume(&qp->rq.pbl);
3341 	qedr_inc_sw_cons(&qp->rq);
3342 
3343 	return 1;
3344 }
3345 
3346 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3347 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3348 {
3349 	u16 cnt = 0;
3350 
3351 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3352 		/* fill WC */
3353 		wc->status = IB_WC_WR_FLUSH_ERR;
3354 		wc->vendor_err = 0;
3355 		wc->wc_flags = 0;
3356 		wc->src_qp = qp->id;
3357 		wc->byte_len = 0;
3358 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3359 		wc->qp = &qp->ibqp;
3360 		num_entries--;
3361 		wc++;
3362 		cnt++;
3363 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3364 			qed_chain_consume(&qp->rq.pbl);
3365 		qedr_inc_sw_cons(&qp->rq);
3366 	}
3367 
3368 	return cnt;
3369 }
3370 
3371 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3372 				 struct rdma_cqe_responder *resp, int *update)
3373 {
3374 	if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
3375 		consume_cqe(cq);
3376 		*update |= 1;
3377 	}
3378 }
3379 
3380 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3381 			     struct qedr_cq *cq, int num_entries,
3382 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3383 			     int *update)
3384 {
3385 	int cnt;
3386 
3387 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3388 		cnt = process_resp_flush(qp, cq, num_entries, wc,
3389 					 resp->rq_cons);
3390 		try_consume_resp_cqe(cq, qp, resp, update);
3391 	} else {
3392 		cnt = process_resp_one(dev, qp, cq, wc, resp);
3393 		consume_cqe(cq);
3394 		*update |= 1;
3395 	}
3396 
3397 	return cnt;
3398 }
3399 
3400 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3401 				struct rdma_cqe_requester *req, int *update)
3402 {
3403 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3404 		consume_cqe(cq);
3405 		*update |= 1;
3406 	}
3407 }
3408 
3409 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3410 {
3411 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3412 	struct qedr_cq *cq = get_qedr_cq(ibcq);
3413 	union rdma_cqe *cqe = cq->latest_cqe;
3414 	u32 old_cons, new_cons;
3415 	unsigned long flags;
3416 	int update = 0;
3417 	int done = 0;
3418 
3419 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3420 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3421 
3422 	spin_lock_irqsave(&cq->cq_lock, flags);
3423 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3424 	while (num_entries && is_valid_cqe(cq, cqe)) {
3425 		struct qedr_qp *qp;
3426 		int cnt = 0;
3427 
3428 		/* prevent speculative reads of any field of CQE */
3429 		rmb();
3430 
3431 		qp = cqe_get_qp(cqe);
3432 		if (!qp) {
3433 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3434 			break;
3435 		}
3436 
3437 		wc->qp = &qp->ibqp;
3438 
3439 		switch (cqe_get_type(cqe)) {
3440 		case RDMA_CQE_TYPE_REQUESTER:
3441 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3442 					       &cqe->req);
3443 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3444 			break;
3445 		case RDMA_CQE_TYPE_RESPONDER_RQ:
3446 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3447 						&cqe->resp, &update);
3448 			break;
3449 		case RDMA_CQE_TYPE_INVALID:
3450 		default:
3451 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3452 			       cqe_get_type(cqe));
3453 		}
3454 		num_entries -= cnt;
3455 		wc += cnt;
3456 		done += cnt;
3457 
3458 		cqe = get_cqe(cq);
3459 	}
3460 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3461 
3462 	cq->cq_cons += new_cons - old_cons;
3463 
3464 	if (update)
3465 		/* doorbell notifies abount latest VALID entry,
3466 		 * but chain already point to the next INVALID one
3467 		 */
3468 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3469 
3470 	spin_unlock_irqrestore(&cq->cq_lock, flags);
3471 	return done;
3472 }
3473 
3474 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3475 		     u8 port_num,
3476 		     const struct ib_wc *in_wc,
3477 		     const struct ib_grh *in_grh,
3478 		     const struct ib_mad_hdr *mad_hdr,
3479 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3480 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3481 {
3482 	struct qedr_dev *dev = get_qedr_dev(ibdev);
3483 
3484 	DP_DEBUG(dev, QEDR_MSG_GSI,
3485 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3486 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3487 		 mad_hdr->class_specific, mad_hdr->class_version,
3488 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3489 	return IB_MAD_RESULT_SUCCESS;
3490 }
3491 
3492 int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
3493 			struct ib_port_immutable *immutable)
3494 {
3495 	struct ib_port_attr attr;
3496 	int err;
3497 
3498 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3499 				    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3500 
3501 	err = ib_query_port(ibdev, port_num, &attr);
3502 	if (err)
3503 		return err;
3504 
3505 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
3506 	immutable->gid_tbl_len = attr.gid_tbl_len;
3507 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
3508 
3509 	return 0;
3510 }
3511