xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision a6ca5ac746d104019e76c29e69c2a1fc6dd2b29f)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_cm.h"
53 
54 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 
56 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
57 {
58 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
59 		return -EINVAL;
60 
61 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
62 	return 0;
63 }
64 
65 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
66 		   union ib_gid *sgid)
67 {
68 	struct qedr_dev *dev = get_qedr_dev(ibdev);
69 	int rc = 0;
70 
71 	if (!rdma_cap_roce_gid_table(ibdev, port))
72 		return -ENODEV;
73 
74 	rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
75 	if (rc == -EAGAIN) {
76 		memcpy(sgid, &zgid, sizeof(*sgid));
77 		return 0;
78 	}
79 
80 	DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
81 		 sgid->global.interface_id, sgid->global.subnet_prefix);
82 
83 	return rc;
84 }
85 
86 int qedr_add_gid(struct ib_device *device, u8 port_num,
87 		 unsigned int index, const union ib_gid *gid,
88 		 const struct ib_gid_attr *attr, void **context)
89 {
90 	if (!rdma_cap_roce_gid_table(device, port_num))
91 		return -EINVAL;
92 
93 	if (port_num > QEDR_MAX_PORT)
94 		return -EINVAL;
95 
96 	if (!context)
97 		return -EINVAL;
98 
99 	return 0;
100 }
101 
102 int qedr_del_gid(struct ib_device *device, u8 port_num,
103 		 unsigned int index, void **context)
104 {
105 	if (!rdma_cap_roce_gid_table(device, port_num))
106 		return -EINVAL;
107 
108 	if (port_num > QEDR_MAX_PORT)
109 		return -EINVAL;
110 
111 	if (!context)
112 		return -EINVAL;
113 
114 	return 0;
115 }
116 
117 int qedr_query_device(struct ib_device *ibdev,
118 		      struct ib_device_attr *attr, struct ib_udata *udata)
119 {
120 	struct qedr_dev *dev = get_qedr_dev(ibdev);
121 	struct qedr_device_attr *qattr = &dev->attr;
122 
123 	if (!dev->rdma_ctx) {
124 		DP_ERR(dev,
125 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
126 		       dev->rdma_ctx);
127 		return -EINVAL;
128 	}
129 
130 	memset(attr, 0, sizeof(*attr));
131 
132 	attr->fw_ver = qattr->fw_ver;
133 	attr->sys_image_guid = qattr->sys_image_guid;
134 	attr->max_mr_size = qattr->max_mr_size;
135 	attr->page_size_cap = qattr->page_size_caps;
136 	attr->vendor_id = qattr->vendor_id;
137 	attr->vendor_part_id = qattr->vendor_part_id;
138 	attr->hw_ver = qattr->hw_ver;
139 	attr->max_qp = qattr->max_qp;
140 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
141 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
142 	    IB_DEVICE_RC_RNR_NAK_GEN |
143 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
144 
145 	attr->max_sge = qattr->max_sge;
146 	attr->max_sge_rd = qattr->max_sge;
147 	attr->max_cq = qattr->max_cq;
148 	attr->max_cqe = qattr->max_cqe;
149 	attr->max_mr = qattr->max_mr;
150 	attr->max_mw = qattr->max_mw;
151 	attr->max_pd = qattr->max_pd;
152 	attr->atomic_cap = dev->atomic_cap;
153 	attr->max_fmr = qattr->max_fmr;
154 	attr->max_map_per_fmr = 16;
155 	attr->max_qp_init_rd_atom =
156 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
157 	attr->max_qp_rd_atom =
158 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
159 		attr->max_qp_init_rd_atom);
160 
161 	attr->max_srq = qattr->max_srq;
162 	attr->max_srq_sge = qattr->max_srq_sge;
163 	attr->max_srq_wr = qattr->max_srq_wr;
164 
165 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
166 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
167 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
168 	attr->max_ah = qattr->max_ah;
169 
170 	return 0;
171 }
172 
173 #define QEDR_SPEED_SDR		(1)
174 #define QEDR_SPEED_DDR		(2)
175 #define QEDR_SPEED_QDR		(4)
176 #define QEDR_SPEED_FDR10	(8)
177 #define QEDR_SPEED_FDR		(16)
178 #define QEDR_SPEED_EDR		(32)
179 
180 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
181 					    u8 *ib_width)
182 {
183 	switch (speed) {
184 	case 1000:
185 		*ib_speed = QEDR_SPEED_SDR;
186 		*ib_width = IB_WIDTH_1X;
187 		break;
188 	case 10000:
189 		*ib_speed = QEDR_SPEED_QDR;
190 		*ib_width = IB_WIDTH_1X;
191 		break;
192 
193 	case 20000:
194 		*ib_speed = QEDR_SPEED_DDR;
195 		*ib_width = IB_WIDTH_4X;
196 		break;
197 
198 	case 25000:
199 		*ib_speed = QEDR_SPEED_EDR;
200 		*ib_width = IB_WIDTH_1X;
201 		break;
202 
203 	case 40000:
204 		*ib_speed = QEDR_SPEED_QDR;
205 		*ib_width = IB_WIDTH_4X;
206 		break;
207 
208 	case 50000:
209 		*ib_speed = QEDR_SPEED_QDR;
210 		*ib_width = IB_WIDTH_4X;
211 		break;
212 
213 	case 100000:
214 		*ib_speed = QEDR_SPEED_EDR;
215 		*ib_width = IB_WIDTH_4X;
216 		break;
217 
218 	default:
219 		/* Unsupported */
220 		*ib_speed = QEDR_SPEED_SDR;
221 		*ib_width = IB_WIDTH_1X;
222 	}
223 }
224 
225 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
226 {
227 	struct qedr_dev *dev;
228 	struct qed_rdma_port *rdma_port;
229 
230 	dev = get_qedr_dev(ibdev);
231 	if (port > 1) {
232 		DP_ERR(dev, "invalid_port=0x%x\n", port);
233 		return -EINVAL;
234 	}
235 
236 	if (!dev->rdma_ctx) {
237 		DP_ERR(dev, "rdma_ctx is NULL\n");
238 		return -EINVAL;
239 	}
240 
241 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
242 
243 	/* *attr being zeroed by the caller, avoid zeroing it here */
244 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
245 		attr->state = IB_PORT_ACTIVE;
246 		attr->phys_state = 5;
247 	} else {
248 		attr->state = IB_PORT_DOWN;
249 		attr->phys_state = 3;
250 	}
251 	attr->max_mtu = IB_MTU_4096;
252 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
253 	attr->lid = 0;
254 	attr->lmc = 0;
255 	attr->sm_lid = 0;
256 	attr->sm_sl = 0;
257 	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
258 	attr->gid_tbl_len = QEDR_MAX_SGID;
259 	attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
260 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
261 	attr->qkey_viol_cntr = 0;
262 	get_link_speed_and_width(rdma_port->link_speed,
263 				 &attr->active_speed, &attr->active_width);
264 	attr->max_msg_sz = rdma_port->max_msg_size;
265 	attr->max_vl_num = 4;
266 
267 	return 0;
268 }
269 
270 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
271 		     struct ib_port_modify *props)
272 {
273 	struct qedr_dev *dev;
274 
275 	dev = get_qedr_dev(ibdev);
276 	if (port > 1) {
277 		DP_ERR(dev, "invalid_port=0x%x\n", port);
278 		return -EINVAL;
279 	}
280 
281 	return 0;
282 }
283 
284 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
285 			 unsigned long len)
286 {
287 	struct qedr_mm *mm;
288 
289 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
290 	if (!mm)
291 		return -ENOMEM;
292 
293 	mm->key.phy_addr = phy_addr;
294 	/* This function might be called with a length which is not a multiple
295 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
296 	 * forces this granularity by increasing the requested size if needed.
297 	 * When qedr_mmap is called, it will search the list with the updated
298 	 * length as a key. To prevent search failures, the length is rounded up
299 	 * in advance to PAGE_SIZE.
300 	 */
301 	mm->key.len = roundup(len, PAGE_SIZE);
302 	INIT_LIST_HEAD(&mm->entry);
303 
304 	mutex_lock(&uctx->mm_list_lock);
305 	list_add(&mm->entry, &uctx->mm_head);
306 	mutex_unlock(&uctx->mm_list_lock);
307 
308 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
309 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
310 		 (unsigned long long)mm->key.phy_addr,
311 		 (unsigned long)mm->key.len, uctx);
312 
313 	return 0;
314 }
315 
316 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
317 			     unsigned long len)
318 {
319 	bool found = false;
320 	struct qedr_mm *mm;
321 
322 	mutex_lock(&uctx->mm_list_lock);
323 	list_for_each_entry(mm, &uctx->mm_head, entry) {
324 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
325 			continue;
326 
327 		found = true;
328 		break;
329 	}
330 	mutex_unlock(&uctx->mm_list_lock);
331 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
332 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
333 		 mm->key.phy_addr, mm->key.len, uctx, found);
334 
335 	return found;
336 }
337 
338 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
339 					struct ib_udata *udata)
340 {
341 	int rc;
342 	struct qedr_ucontext *ctx;
343 	struct qedr_alloc_ucontext_resp uresp;
344 	struct qedr_dev *dev = get_qedr_dev(ibdev);
345 	struct qed_rdma_add_user_out_params oparams;
346 
347 	if (!udata)
348 		return ERR_PTR(-EFAULT);
349 
350 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
351 	if (!ctx)
352 		return ERR_PTR(-ENOMEM);
353 
354 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
355 	if (rc) {
356 		DP_ERR(dev,
357 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
358 		       rc);
359 		goto err;
360 	}
361 
362 	ctx->dpi = oparams.dpi;
363 	ctx->dpi_addr = oparams.dpi_addr;
364 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
365 	ctx->dpi_size = oparams.dpi_size;
366 	INIT_LIST_HEAD(&ctx->mm_head);
367 	mutex_init(&ctx->mm_list_lock);
368 
369 	memset(&uresp, 0, sizeof(uresp));
370 
371 	uresp.db_pa = ctx->dpi_phys_addr;
372 	uresp.db_size = ctx->dpi_size;
373 	uresp.max_send_wr = dev->attr.max_sqe;
374 	uresp.max_recv_wr = dev->attr.max_rqe;
375 	uresp.max_srq_wr = dev->attr.max_srq_wr;
376 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
377 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
378 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
379 	uresp.max_cqes = QEDR_MAX_CQES;
380 
381 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
382 	if (rc)
383 		goto err;
384 
385 	ctx->dev = dev;
386 
387 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
388 	if (rc)
389 		goto err;
390 
391 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
392 		 &ctx->ibucontext);
393 	return &ctx->ibucontext;
394 
395 err:
396 	kfree(ctx);
397 	return ERR_PTR(rc);
398 }
399 
400 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
401 {
402 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
403 	struct qedr_mm *mm, *tmp;
404 	int status = 0;
405 
406 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
407 		 uctx);
408 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
409 
410 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
411 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
412 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
413 			 mm->key.phy_addr, mm->key.len, uctx);
414 		list_del(&mm->entry);
415 		kfree(mm);
416 	}
417 
418 	kfree(uctx);
419 	return status;
420 }
421 
422 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
423 {
424 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
425 	struct qedr_dev *dev = get_qedr_dev(context->device);
426 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
427 	u64 unmapped_db = dev->db_phys_addr;
428 	unsigned long len = (vma->vm_end - vma->vm_start);
429 	int rc = 0;
430 	bool found;
431 
432 	DP_DEBUG(dev, QEDR_MSG_INIT,
433 		 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
434 		 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
435 	if (vma->vm_start & (PAGE_SIZE - 1)) {
436 		DP_ERR(dev, "Vma_start not page aligned = %ld\n",
437 		       vma->vm_start);
438 		return -EINVAL;
439 	}
440 
441 	found = qedr_search_mmap(ucontext, vm_page, len);
442 	if (!found) {
443 		DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
444 		       vma->vm_pgoff);
445 		return -EINVAL;
446 	}
447 
448 	DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
449 
450 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
451 						     dev->db_size))) {
452 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
453 		if (vma->vm_flags & VM_READ) {
454 			DP_ERR(dev, "Trying to map doorbell bar for read\n");
455 			return -EPERM;
456 		}
457 
458 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
459 
460 		rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
461 					PAGE_SIZE, vma->vm_page_prot);
462 	} else {
463 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
464 		rc = remap_pfn_range(vma, vma->vm_start,
465 				     vma->vm_pgoff, len, vma->vm_page_prot);
466 	}
467 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
468 	return rc;
469 }
470 
471 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
472 			    struct ib_ucontext *context, struct ib_udata *udata)
473 {
474 	struct qedr_dev *dev = get_qedr_dev(ibdev);
475 	struct qedr_pd *pd;
476 	u16 pd_id;
477 	int rc;
478 
479 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
480 		 (udata && context) ? "User Lib" : "Kernel");
481 
482 	if (!dev->rdma_ctx) {
483 		DP_ERR(dev, "invlaid RDMA context\n");
484 		return ERR_PTR(-EINVAL);
485 	}
486 
487 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
488 	if (!pd)
489 		return ERR_PTR(-ENOMEM);
490 
491 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
492 	if (rc)
493 		goto err;
494 
495 	pd->pd_id = pd_id;
496 
497 	if (udata && context) {
498 		struct qedr_alloc_pd_uresp uresp;
499 
500 		uresp.pd_id = pd_id;
501 
502 		rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
503 		if (rc) {
504 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
505 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
506 			goto err;
507 		}
508 
509 		pd->uctx = get_qedr_ucontext(context);
510 		pd->uctx->pd = pd;
511 	}
512 
513 	return &pd->ibpd;
514 
515 err:
516 	kfree(pd);
517 	return ERR_PTR(rc);
518 }
519 
520 int qedr_dealloc_pd(struct ib_pd *ibpd)
521 {
522 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
523 	struct qedr_pd *pd = get_qedr_pd(ibpd);
524 
525 	if (!pd) {
526 		pr_err("Invalid PD received in dealloc_pd\n");
527 		return -EINVAL;
528 	}
529 
530 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
531 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
532 
533 	kfree(pd);
534 
535 	return 0;
536 }
537 
538 static void qedr_free_pbl(struct qedr_dev *dev,
539 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
540 {
541 	struct pci_dev *pdev = dev->pdev;
542 	int i;
543 
544 	for (i = 0; i < pbl_info->num_pbls; i++) {
545 		if (!pbl[i].va)
546 			continue;
547 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
548 				  pbl[i].va, pbl[i].pa);
549 	}
550 
551 	kfree(pbl);
552 }
553 
554 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
555 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
556 
557 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
558 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
559 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
560 
561 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
562 					   struct qedr_pbl_info *pbl_info,
563 					   gfp_t flags)
564 {
565 	struct pci_dev *pdev = dev->pdev;
566 	struct qedr_pbl *pbl_table;
567 	dma_addr_t *pbl_main_tbl;
568 	dma_addr_t pa;
569 	void *va;
570 	int i;
571 
572 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
573 	if (!pbl_table)
574 		return ERR_PTR(-ENOMEM);
575 
576 	for (i = 0; i < pbl_info->num_pbls; i++) {
577 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
578 					&pa, flags);
579 		if (!va)
580 			goto err;
581 
582 		memset(va, 0, pbl_info->pbl_size);
583 		pbl_table[i].va = va;
584 		pbl_table[i].pa = pa;
585 	}
586 
587 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
588 	 * the first one with physical pointers to all of the rest
589 	 */
590 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
591 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
592 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
593 
594 	return pbl_table;
595 
596 err:
597 	for (i--; i >= 0; i--)
598 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
599 				  pbl_table[i].va, pbl_table[i].pa);
600 
601 	qedr_free_pbl(dev, pbl_info, pbl_table);
602 
603 	return ERR_PTR(-ENOMEM);
604 }
605 
606 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
607 				struct qedr_pbl_info *pbl_info,
608 				u32 num_pbes, int two_layer_capable)
609 {
610 	u32 pbl_capacity;
611 	u32 pbl_size;
612 	u32 num_pbls;
613 
614 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
615 		if (num_pbes > MAX_PBES_TWO_LAYER) {
616 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
617 			       num_pbes);
618 			return -EINVAL;
619 		}
620 
621 		/* calculate required pbl page size */
622 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
623 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
624 			       NUM_PBES_ON_PAGE(pbl_size);
625 
626 		while (pbl_capacity < num_pbes) {
627 			pbl_size *= 2;
628 			pbl_capacity = pbl_size / sizeof(u64);
629 			pbl_capacity = pbl_capacity * pbl_capacity;
630 		}
631 
632 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
633 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
634 		pbl_info->two_layered = true;
635 	} else {
636 		/* One layered PBL */
637 		num_pbls = 1;
638 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
639 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
640 		pbl_info->two_layered = false;
641 	}
642 
643 	pbl_info->num_pbls = num_pbls;
644 	pbl_info->pbl_size = pbl_size;
645 	pbl_info->num_pbes = num_pbes;
646 
647 	DP_DEBUG(dev, QEDR_MSG_MR,
648 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
649 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
650 
651 	return 0;
652 }
653 
654 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
655 			       struct qedr_pbl *pbl,
656 			       struct qedr_pbl_info *pbl_info)
657 {
658 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
659 	struct qedr_pbl *pbl_tbl;
660 	struct scatterlist *sg;
661 	struct regpair *pbe;
662 	int entry;
663 	u32 addr;
664 
665 	if (!pbl_info->num_pbes)
666 		return;
667 
668 	/* If we have a two layered pbl, the first pbl points to the rest
669 	 * of the pbls and the first entry lays on the second pbl in the table
670 	 */
671 	if (pbl_info->two_layered)
672 		pbl_tbl = &pbl[1];
673 	else
674 		pbl_tbl = pbl;
675 
676 	pbe = (struct regpair *)pbl_tbl->va;
677 	if (!pbe) {
678 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
679 		return;
680 	}
681 
682 	pbe_cnt = 0;
683 
684 	shift = umem->page_shift;
685 
686 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
687 		pages = sg_dma_len(sg) >> shift;
688 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
689 			/* store the page address in pbe */
690 			pbe->lo = cpu_to_le32(sg_dma_address(sg) +
691 					      (pg_cnt << shift));
692 			addr = upper_32_bits(sg_dma_address(sg) +
693 					     (pg_cnt << shift));
694 			pbe->hi = cpu_to_le32(addr);
695 			pbe_cnt++;
696 			total_num_pbes++;
697 			pbe++;
698 
699 			if (total_num_pbes == pbl_info->num_pbes)
700 				return;
701 
702 			/* If the given pbl is full storing the pbes,
703 			 * move to next pbl.
704 			 */
705 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
706 				pbl_tbl++;
707 				pbe = (struct regpair *)pbl_tbl->va;
708 				pbe_cnt = 0;
709 			}
710 		}
711 	}
712 }
713 
714 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
715 			      struct qedr_cq *cq, struct ib_udata *udata)
716 {
717 	struct qedr_create_cq_uresp uresp;
718 	int rc;
719 
720 	memset(&uresp, 0, sizeof(uresp));
721 
722 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
723 	uresp.icid = cq->icid;
724 
725 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
726 	if (rc)
727 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
728 
729 	return rc;
730 }
731 
732 static void consume_cqe(struct qedr_cq *cq)
733 {
734 	if (cq->latest_cqe == cq->toggle_cqe)
735 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
736 
737 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
738 }
739 
740 static inline int qedr_align_cq_entries(int entries)
741 {
742 	u64 size, aligned_size;
743 
744 	/* We allocate an extra entry that we don't report to the FW. */
745 	size = (entries + 1) * QEDR_CQE_SIZE;
746 	aligned_size = ALIGN(size, PAGE_SIZE);
747 
748 	return aligned_size / QEDR_CQE_SIZE;
749 }
750 
751 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
752 				       struct qedr_dev *dev,
753 				       struct qedr_userq *q,
754 				       u64 buf_addr, size_t buf_len,
755 				       int access, int dmasync)
756 {
757 	int page_cnt;
758 	int rc;
759 
760 	q->buf_addr = buf_addr;
761 	q->buf_len = buf_len;
762 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
763 	if (IS_ERR(q->umem)) {
764 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
765 		       PTR_ERR(q->umem));
766 		return PTR_ERR(q->umem);
767 	}
768 
769 	page_cnt = ib_umem_page_count(q->umem);
770 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
771 	if (rc)
772 		goto err0;
773 
774 	q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
775 	if (IS_ERR(q->pbl_tbl)) {
776 		rc = PTR_ERR(q->pbl_tbl);
777 		goto err0;
778 	}
779 
780 	qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
781 
782 	return 0;
783 
784 err0:
785 	ib_umem_release(q->umem);
786 
787 	return rc;
788 }
789 
790 static inline void qedr_init_cq_params(struct qedr_cq *cq,
791 				       struct qedr_ucontext *ctx,
792 				       struct qedr_dev *dev, int vector,
793 				       int chain_entries, int page_cnt,
794 				       u64 pbl_ptr,
795 				       struct qed_rdma_create_cq_in_params
796 				       *params)
797 {
798 	memset(params, 0, sizeof(*params));
799 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
800 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
801 	params->cnq_id = vector;
802 	params->cq_size = chain_entries - 1;
803 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
804 	params->pbl_num_pages = page_cnt;
805 	params->pbl_ptr = pbl_ptr;
806 	params->pbl_two_level = 0;
807 }
808 
809 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
810 {
811 	/* Flush data before signalling doorbell */
812 	wmb();
813 	cq->db.data.agg_flags = flags;
814 	cq->db.data.value = cpu_to_le32(cons);
815 	writeq(cq->db.raw, cq->db_addr);
816 
817 	/* Make sure write would stick */
818 	mmiowb();
819 }
820 
821 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
822 {
823 	struct qedr_cq *cq = get_qedr_cq(ibcq);
824 	unsigned long sflags;
825 	struct qedr_dev *dev;
826 
827 	dev = get_qedr_dev(ibcq->device);
828 
829 	if (cq->destroyed) {
830 		DP_ERR(dev,
831 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
832 		       cq, cq->icid);
833 		return -EINVAL;
834 	}
835 
836 
837 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
838 		return 0;
839 
840 	spin_lock_irqsave(&cq->cq_lock, sflags);
841 
842 	cq->arm_flags = 0;
843 
844 	if (flags & IB_CQ_SOLICITED)
845 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
846 
847 	if (flags & IB_CQ_NEXT_COMP)
848 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
849 
850 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
851 
852 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
853 
854 	return 0;
855 }
856 
857 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
858 			     const struct ib_cq_init_attr *attr,
859 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
860 {
861 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
862 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
863 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
864 	struct qedr_dev *dev = get_qedr_dev(ibdev);
865 	struct qed_rdma_create_cq_in_params params;
866 	struct qedr_create_cq_ureq ureq;
867 	int vector = attr->comp_vector;
868 	int entries = attr->cqe;
869 	struct qedr_cq *cq;
870 	int chain_entries;
871 	int page_cnt;
872 	u64 pbl_ptr;
873 	u16 icid;
874 	int rc;
875 
876 	DP_DEBUG(dev, QEDR_MSG_INIT,
877 		 "create_cq: called from %s. entries=%d, vector=%d\n",
878 		 udata ? "User Lib" : "Kernel", entries, vector);
879 
880 	if (entries > QEDR_MAX_CQES) {
881 		DP_ERR(dev,
882 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
883 		       entries, QEDR_MAX_CQES);
884 		return ERR_PTR(-EINVAL);
885 	}
886 
887 	chain_entries = qedr_align_cq_entries(entries);
888 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
889 
890 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
891 	if (!cq)
892 		return ERR_PTR(-ENOMEM);
893 
894 	if (udata) {
895 		memset(&ureq, 0, sizeof(ureq));
896 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
897 			DP_ERR(dev,
898 			       "create cq: problem copying data from user space\n");
899 			goto err0;
900 		}
901 
902 		if (!ureq.len) {
903 			DP_ERR(dev,
904 			       "create cq: cannot create a cq with 0 entries\n");
905 			goto err0;
906 		}
907 
908 		cq->cq_type = QEDR_CQ_TYPE_USER;
909 
910 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
911 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
912 		if (rc)
913 			goto err0;
914 
915 		pbl_ptr = cq->q.pbl_tbl->pa;
916 		page_cnt = cq->q.pbl_info.num_pbes;
917 
918 		cq->ibcq.cqe = chain_entries;
919 	} else {
920 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
921 
922 		rc = dev->ops->common->chain_alloc(dev->cdev,
923 						   QED_CHAIN_USE_TO_CONSUME,
924 						   QED_CHAIN_MODE_PBL,
925 						   QED_CHAIN_CNT_TYPE_U32,
926 						   chain_entries,
927 						   sizeof(union rdma_cqe),
928 						   &cq->pbl);
929 		if (rc)
930 			goto err1;
931 
932 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
933 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
934 		cq->ibcq.cqe = cq->pbl.capacity;
935 	}
936 
937 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
938 			    pbl_ptr, &params);
939 
940 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
941 	if (rc)
942 		goto err2;
943 
944 	cq->icid = icid;
945 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
946 	spin_lock_init(&cq->cq_lock);
947 
948 	if (ib_ctx) {
949 		rc = qedr_copy_cq_uresp(dev, cq, udata);
950 		if (rc)
951 			goto err3;
952 	} else {
953 		/* Generate doorbell address. */
954 		cq->db_addr = dev->db_addr +
955 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
956 		cq->db.data.icid = cq->icid;
957 		cq->db.data.params = DB_AGG_CMD_SET <<
958 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
959 
960 		/* point to the very last element, passing it we will toggle */
961 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
962 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
963 		cq->latest_cqe = NULL;
964 		consume_cqe(cq);
965 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
966 	}
967 
968 	DP_DEBUG(dev, QEDR_MSG_CQ,
969 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
970 		 cq->icid, cq, params.cq_size);
971 
972 	return &cq->ibcq;
973 
974 err3:
975 	destroy_iparams.icid = cq->icid;
976 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
977 				  &destroy_oparams);
978 err2:
979 	if (udata)
980 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
981 	else
982 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
983 err1:
984 	if (udata)
985 		ib_umem_release(cq->q.umem);
986 err0:
987 	kfree(cq);
988 	return ERR_PTR(-EINVAL);
989 }
990 
991 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
992 {
993 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
994 	struct qedr_cq *cq = get_qedr_cq(ibcq);
995 
996 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
997 
998 	return 0;
999 }
1000 
1001 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1002 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1003 
1004 int qedr_destroy_cq(struct ib_cq *ibcq)
1005 {
1006 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1007 	struct qed_rdma_destroy_cq_out_params oparams;
1008 	struct qed_rdma_destroy_cq_in_params iparams;
1009 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1010 	int iter;
1011 	int rc;
1012 
1013 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1014 
1015 	cq->destroyed = 1;
1016 
1017 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1018 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1019 		goto done;
1020 
1021 	iparams.icid = cq->icid;
1022 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1023 	if (rc)
1024 		return rc;
1025 
1026 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1027 
1028 	if (ibcq->uobject && ibcq->uobject->context) {
1029 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1030 		ib_umem_release(cq->q.umem);
1031 	}
1032 
1033 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1034 	 * wait until all CNQ interrupts, if any, are received. This will always
1035 	 * happen and will always happen very fast. If not, then a serious error
1036 	 * has occured. That is why we can use a long delay.
1037 	 * We spin for a short time so we don’t lose time on context switching
1038 	 * in case all the completions are handled in that span. Otherwise
1039 	 * we sleep for a while and check again. Since the CNQ may be
1040 	 * associated with (only) the current CPU we use msleep to allow the
1041 	 * current CPU to be freed.
1042 	 * The CNQ notification is increased in qedr_irq_handler().
1043 	 */
1044 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1045 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1046 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1047 		iter--;
1048 	}
1049 
1050 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1051 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1052 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1053 		iter--;
1054 	}
1055 
1056 	if (oparams.num_cq_notif != cq->cnq_notif)
1057 		goto err;
1058 
1059 	/* Note that we don't need to have explicit code to wait for the
1060 	 * completion of the event handler because it is invoked from the EQ.
1061 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1062 	 * be certain that there's no event handler in process.
1063 	 */
1064 done:
1065 	cq->sig = ~cq->sig;
1066 
1067 	kfree(cq);
1068 
1069 	return 0;
1070 
1071 err:
1072 	DP_ERR(dev,
1073 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1074 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1075 
1076 	return -EINVAL;
1077 }
1078 
1079 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1080 					  struct ib_qp_attr *attr,
1081 					  int attr_mask,
1082 					  struct qed_rdma_modify_qp_in_params
1083 					  *qp_params)
1084 {
1085 	enum rdma_network_type nw_type;
1086 	struct ib_gid_attr gid_attr;
1087 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1088 	union ib_gid gid;
1089 	u32 ipv4_addr;
1090 	int rc = 0;
1091 	int i;
1092 
1093 	rc = ib_get_cached_gid(ibqp->device,
1094 			       rdma_ah_get_port_num(&attr->ah_attr),
1095 			       grh->sgid_index, &gid, &gid_attr);
1096 	if (rc)
1097 		return rc;
1098 
1099 	if (!memcmp(&gid, &zgid, sizeof(gid)))
1100 		return -ENOENT;
1101 
1102 	if (gid_attr.ndev) {
1103 		qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1104 
1105 		dev_put(gid_attr.ndev);
1106 		nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1107 		switch (nw_type) {
1108 		case RDMA_NETWORK_IPV6:
1109 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1110 			       sizeof(qp_params->sgid));
1111 			memcpy(&qp_params->dgid.bytes[0],
1112 			       &grh->dgid,
1113 			       sizeof(qp_params->dgid));
1114 			qp_params->roce_mode = ROCE_V2_IPV6;
1115 			SET_FIELD(qp_params->modify_flags,
1116 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1117 			break;
1118 		case RDMA_NETWORK_IB:
1119 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1120 			       sizeof(qp_params->sgid));
1121 			memcpy(&qp_params->dgid.bytes[0],
1122 			       &grh->dgid,
1123 			       sizeof(qp_params->dgid));
1124 			qp_params->roce_mode = ROCE_V1;
1125 			break;
1126 		case RDMA_NETWORK_IPV4:
1127 			memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1128 			memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1129 			ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1130 			qp_params->sgid.ipv4_addr = ipv4_addr;
1131 			ipv4_addr =
1132 			    qedr_get_ipv4_from_gid(grh->dgid.raw);
1133 			qp_params->dgid.ipv4_addr = ipv4_addr;
1134 			SET_FIELD(qp_params->modify_flags,
1135 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1136 			qp_params->roce_mode = ROCE_V2_IPV4;
1137 			break;
1138 		}
1139 	}
1140 
1141 	for (i = 0; i < 4; i++) {
1142 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1143 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1144 	}
1145 
1146 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1147 		qp_params->vlan_id = 0;
1148 
1149 	return 0;
1150 }
1151 
1152 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1153 			       struct ib_qp_init_attr *attrs)
1154 {
1155 	struct qedr_device_attr *qattr = &dev->attr;
1156 
1157 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1158 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1159 		DP_DEBUG(dev, QEDR_MSG_QP,
1160 			 "create qp: unsupported qp type=0x%x requested\n",
1161 			 attrs->qp_type);
1162 		return -EINVAL;
1163 	}
1164 
1165 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1166 		DP_ERR(dev,
1167 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1168 		       attrs->cap.max_send_wr, qattr->max_sqe);
1169 		return -EINVAL;
1170 	}
1171 
1172 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1173 		DP_ERR(dev,
1174 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1175 		       attrs->cap.max_inline_data, qattr->max_inline);
1176 		return -EINVAL;
1177 	}
1178 
1179 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1180 		DP_ERR(dev,
1181 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1182 		       attrs->cap.max_send_sge, qattr->max_sge);
1183 		return -EINVAL;
1184 	}
1185 
1186 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1187 		DP_ERR(dev,
1188 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1189 		       attrs->cap.max_recv_sge, qattr->max_sge);
1190 		return -EINVAL;
1191 	}
1192 
1193 	/* Unprivileged user space cannot create special QP */
1194 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1195 		DP_ERR(dev,
1196 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1197 		       attrs->qp_type);
1198 		return -EINVAL;
1199 	}
1200 
1201 	return 0;
1202 }
1203 
1204 static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
1205 			       struct qedr_qp *qp)
1206 {
1207 	uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1208 	uresp->rq_icid = qp->icid;
1209 }
1210 
1211 static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
1212 			       struct qedr_qp *qp)
1213 {
1214 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1215 	uresp->sq_icid = qp->icid + 1;
1216 }
1217 
1218 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1219 			      struct qedr_qp *qp, struct ib_udata *udata)
1220 {
1221 	struct qedr_create_qp_uresp uresp;
1222 	int rc;
1223 
1224 	memset(&uresp, 0, sizeof(uresp));
1225 	qedr_copy_sq_uresp(&uresp, qp);
1226 	qedr_copy_rq_uresp(&uresp, qp);
1227 
1228 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1229 	uresp.qp_id = qp->qp_id;
1230 
1231 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1232 	if (rc)
1233 		DP_ERR(dev,
1234 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1235 		       qp->icid);
1236 
1237 	return rc;
1238 }
1239 
1240 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1241 				      struct qedr_qp *qp,
1242 				      struct qedr_pd *pd,
1243 				      struct ib_qp_init_attr *attrs)
1244 {
1245 	spin_lock_init(&qp->q_lock);
1246 	qp->pd = pd;
1247 	qp->qp_type = attrs->qp_type;
1248 	qp->max_inline_data = attrs->cap.max_inline_data;
1249 	qp->sq.max_sges = attrs->cap.max_send_sge;
1250 	qp->state = QED_ROCE_QP_STATE_RESET;
1251 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1252 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1253 	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1254 	qp->dev = dev;
1255 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1256 
1257 	DP_DEBUG(dev, QEDR_MSG_QP,
1258 		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1259 		 qp->rq.max_sges, qp->rq_cq->icid);
1260 	DP_DEBUG(dev, QEDR_MSG_QP,
1261 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1262 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1263 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1264 	DP_DEBUG(dev, QEDR_MSG_QP,
1265 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1266 		 qp->sq.max_sges, qp->sq_cq->icid);
1267 }
1268 
1269 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1270 {
1271 	qp->sq.db = dev->db_addr +
1272 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1273 	qp->sq.db_data.data.icid = qp->icid + 1;
1274 	qp->rq.db = dev->db_addr +
1275 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1276 	qp->rq.db_data.data.icid = qp->icid;
1277 }
1278 
1279 static inline void
1280 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1281 			      struct qedr_pd *pd,
1282 			      struct qedr_qp *qp,
1283 			      struct ib_qp_init_attr *attrs,
1284 			      bool fmr_and_reserved_lkey,
1285 			      struct qed_rdma_create_qp_in_params *params)
1286 {
1287 	/* QP handle to be written in an async event */
1288 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1289 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1290 
1291 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1292 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1293 	params->pd = pd->pd_id;
1294 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1295 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1296 	params->stats_queue = 0;
1297 	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1298 	params->srq_id = 0;
1299 	params->use_srq = false;
1300 }
1301 
1302 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1303 {
1304 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1305 		 "qp=%p. "
1306 		 "sq_addr=0x%llx, "
1307 		 "sq_len=%zd, "
1308 		 "rq_addr=0x%llx, "
1309 		 "rq_len=%zd"
1310 		 "\n",
1311 		 qp,
1312 		 qp->usq.buf_addr,
1313 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1314 }
1315 
1316 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1317 {
1318 	if (qp->usq.umem)
1319 		ib_umem_release(qp->usq.umem);
1320 	qp->usq.umem = NULL;
1321 
1322 	if (qp->urq.umem)
1323 		ib_umem_release(qp->urq.umem);
1324 	qp->urq.umem = NULL;
1325 }
1326 
1327 static int qedr_create_user_qp(struct qedr_dev *dev,
1328 			       struct qedr_qp *qp,
1329 			       struct ib_pd *ibpd,
1330 			       struct ib_udata *udata,
1331 			       struct ib_qp_init_attr *attrs)
1332 {
1333 	struct qed_rdma_create_qp_in_params in_params;
1334 	struct qed_rdma_create_qp_out_params out_params;
1335 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1336 	struct ib_ucontext *ib_ctx = NULL;
1337 	struct qedr_ucontext *ctx = NULL;
1338 	struct qedr_create_qp_ureq ureq;
1339 	int rc = -EINVAL;
1340 
1341 	ib_ctx = ibpd->uobject->context;
1342 	ctx = get_qedr_ucontext(ib_ctx);
1343 
1344 	memset(&ureq, 0, sizeof(ureq));
1345 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1346 	if (rc) {
1347 		DP_ERR(dev, "Problem copying data from user space\n");
1348 		return rc;
1349 	}
1350 
1351 	/* SQ - read access only (0), dma sync not required (0) */
1352 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1353 				  ureq.sq_len, 0, 0);
1354 	if (rc)
1355 		return rc;
1356 
1357 	/* RQ - read access only (0), dma sync not required (0) */
1358 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1359 				  ureq.rq_len, 0, 0);
1360 
1361 	if (rc)
1362 		return rc;
1363 
1364 	memset(&in_params, 0, sizeof(in_params));
1365 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1366 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1367 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1368 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1369 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1370 	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1371 	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1372 
1373 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1374 					      &in_params, &out_params);
1375 
1376 	if (!qp->qed_qp) {
1377 		rc = -ENOMEM;
1378 		goto err1;
1379 	}
1380 
1381 	qp->qp_id = out_params.qp_id;
1382 	qp->icid = out_params.icid;
1383 
1384 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1385 	if (rc)
1386 		goto err;
1387 
1388 	qedr_qp_user_print(dev, qp);
1389 
1390 	return 0;
1391 err:
1392 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1393 	if (rc)
1394 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1395 
1396 err1:
1397 	qedr_cleanup_user(dev, qp);
1398 	return rc;
1399 }
1400 
1401 static int
1402 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1403 			   struct qedr_qp *qp,
1404 			   struct qed_rdma_create_qp_in_params *in_params,
1405 			   u32 n_sq_elems, u32 n_rq_elems)
1406 {
1407 	struct qed_rdma_create_qp_out_params out_params;
1408 	int rc;
1409 
1410 	rc = dev->ops->common->chain_alloc(dev->cdev,
1411 					   QED_CHAIN_USE_TO_PRODUCE,
1412 					   QED_CHAIN_MODE_PBL,
1413 					   QED_CHAIN_CNT_TYPE_U32,
1414 					   n_sq_elems,
1415 					   QEDR_SQE_ELEMENT_SIZE,
1416 					   &qp->sq.pbl);
1417 
1418 	if (rc)
1419 		return rc;
1420 
1421 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1422 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1423 
1424 	rc = dev->ops->common->chain_alloc(dev->cdev,
1425 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1426 					   QED_CHAIN_MODE_PBL,
1427 					   QED_CHAIN_CNT_TYPE_U32,
1428 					   n_rq_elems,
1429 					   QEDR_RQE_ELEMENT_SIZE,
1430 					   &qp->rq.pbl);
1431 	if (rc)
1432 		return rc;
1433 
1434 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1435 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1436 
1437 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1438 					      in_params, &out_params);
1439 
1440 	if (!qp->qed_qp)
1441 		return -EINVAL;
1442 
1443 	qp->qp_id = out_params.qp_id;
1444 	qp->icid = out_params.icid;
1445 
1446 	qedr_set_roce_db_info(dev, qp);
1447 
1448 	return 0;
1449 }
1450 
1451 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1452 {
1453 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1454 	kfree(qp->wqe_wr_id);
1455 
1456 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1457 	kfree(qp->rqe_wr_id);
1458 }
1459 
1460 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1461 				 struct qedr_qp *qp,
1462 				 struct ib_pd *ibpd,
1463 				 struct ib_qp_init_attr *attrs)
1464 {
1465 	struct qed_rdma_create_qp_in_params in_params;
1466 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1467 	int rc = -EINVAL;
1468 	u32 n_rq_elems;
1469 	u32 n_sq_elems;
1470 	u32 n_sq_entries;
1471 
1472 	memset(&in_params, 0, sizeof(in_params));
1473 
1474 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1475 	 * the ring. The ring should allow at least a single WR, even if the
1476 	 * user requested none, due to allocation issues.
1477 	 * We should add an extra WR since the prod and cons indices of
1478 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1479 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1480 	 * double the number of entries due an iSER issue that pushes far more
1481 	 * WRs than indicated. If we decline its ib_post_send() then we get
1482 	 * error prints in the dmesg we'd like to avoid.
1483 	 */
1484 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1485 			      dev->attr.max_sqe);
1486 
1487 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1488 				GFP_KERNEL);
1489 	if (!qp->wqe_wr_id) {
1490 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1491 		return -ENOMEM;
1492 	}
1493 
1494 	/* QP handle to be written in CQE */
1495 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1496 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1497 
1498 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1499 	 * the ring. There ring should allow at least a single WR, even if the
1500 	 * user requested none, due to allocation issues.
1501 	 */
1502 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1503 
1504 	/* Allocate driver internal RQ array */
1505 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1506 				GFP_KERNEL);
1507 	if (!qp->rqe_wr_id) {
1508 		DP_ERR(dev,
1509 		       "create qp: failed RQ shadow memory allocation\n");
1510 		kfree(qp->wqe_wr_id);
1511 		return -ENOMEM;
1512 	}
1513 
1514 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1515 
1516 	n_sq_entries = attrs->cap.max_send_wr;
1517 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1518 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1519 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1520 
1521 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1522 
1523 	rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1524 					n_sq_elems, n_rq_elems);
1525 	if (rc)
1526 		qedr_cleanup_kernel(dev, qp);
1527 
1528 	return rc;
1529 }
1530 
1531 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1532 			     struct ib_qp_init_attr *attrs,
1533 			     struct ib_udata *udata)
1534 {
1535 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1536 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1537 	struct qedr_qp *qp;
1538 	struct ib_qp *ibqp;
1539 	int rc = 0;
1540 
1541 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1542 		 udata ? "user library" : "kernel", pd);
1543 
1544 	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1545 	if (rc)
1546 		return ERR_PTR(rc);
1547 
1548 	if (attrs->srq)
1549 		return ERR_PTR(-EINVAL);
1550 
1551 	DP_DEBUG(dev, QEDR_MSG_QP,
1552 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1553 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1554 		 get_qedr_cq(attrs->send_cq),
1555 		 get_qedr_cq(attrs->send_cq)->icid,
1556 		 get_qedr_cq(attrs->recv_cq),
1557 		 get_qedr_cq(attrs->recv_cq)->icid);
1558 
1559 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1560 	if (!qp) {
1561 		DP_ERR(dev, "create qp: failed allocating memory\n");
1562 		return ERR_PTR(-ENOMEM);
1563 	}
1564 
1565 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1566 
1567 	if (attrs->qp_type == IB_QPT_GSI) {
1568 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1569 		if (IS_ERR(ibqp))
1570 			kfree(qp);
1571 		return ibqp;
1572 	}
1573 
1574 	if (udata)
1575 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1576 	else
1577 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1578 
1579 	if (rc)
1580 		goto err;
1581 
1582 	qp->ibqp.qp_num = qp->qp_id;
1583 
1584 	return &qp->ibqp;
1585 
1586 err:
1587 	kfree(qp);
1588 
1589 	return ERR_PTR(-EFAULT);
1590 }
1591 
1592 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1593 {
1594 	switch (qp_state) {
1595 	case QED_ROCE_QP_STATE_RESET:
1596 		return IB_QPS_RESET;
1597 	case QED_ROCE_QP_STATE_INIT:
1598 		return IB_QPS_INIT;
1599 	case QED_ROCE_QP_STATE_RTR:
1600 		return IB_QPS_RTR;
1601 	case QED_ROCE_QP_STATE_RTS:
1602 		return IB_QPS_RTS;
1603 	case QED_ROCE_QP_STATE_SQD:
1604 		return IB_QPS_SQD;
1605 	case QED_ROCE_QP_STATE_ERR:
1606 		return IB_QPS_ERR;
1607 	case QED_ROCE_QP_STATE_SQE:
1608 		return IB_QPS_SQE;
1609 	}
1610 	return IB_QPS_ERR;
1611 }
1612 
1613 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1614 					enum ib_qp_state qp_state)
1615 {
1616 	switch (qp_state) {
1617 	case IB_QPS_RESET:
1618 		return QED_ROCE_QP_STATE_RESET;
1619 	case IB_QPS_INIT:
1620 		return QED_ROCE_QP_STATE_INIT;
1621 	case IB_QPS_RTR:
1622 		return QED_ROCE_QP_STATE_RTR;
1623 	case IB_QPS_RTS:
1624 		return QED_ROCE_QP_STATE_RTS;
1625 	case IB_QPS_SQD:
1626 		return QED_ROCE_QP_STATE_SQD;
1627 	case IB_QPS_ERR:
1628 		return QED_ROCE_QP_STATE_ERR;
1629 	default:
1630 		return QED_ROCE_QP_STATE_ERR;
1631 	}
1632 }
1633 
1634 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1635 {
1636 	qed_chain_reset(&qph->pbl);
1637 	qph->prod = 0;
1638 	qph->cons = 0;
1639 	qph->wqe_cons = 0;
1640 	qph->db_data.data.value = cpu_to_le16(0);
1641 }
1642 
1643 static int qedr_update_qp_state(struct qedr_dev *dev,
1644 				struct qedr_qp *qp,
1645 				enum qed_roce_qp_state new_state)
1646 {
1647 	int status = 0;
1648 
1649 	if (new_state == qp->state)
1650 		return 0;
1651 
1652 	switch (qp->state) {
1653 	case QED_ROCE_QP_STATE_RESET:
1654 		switch (new_state) {
1655 		case QED_ROCE_QP_STATE_INIT:
1656 			qp->prev_wqe_size = 0;
1657 			qedr_reset_qp_hwq_info(&qp->sq);
1658 			qedr_reset_qp_hwq_info(&qp->rq);
1659 			break;
1660 		default:
1661 			status = -EINVAL;
1662 			break;
1663 		};
1664 		break;
1665 	case QED_ROCE_QP_STATE_INIT:
1666 		switch (new_state) {
1667 		case QED_ROCE_QP_STATE_RTR:
1668 			/* Update doorbell (in case post_recv was
1669 			 * done before move to RTR)
1670 			 */
1671 			wmb();
1672 			writel(qp->rq.db_data.raw, qp->rq.db);
1673 			/* Make sure write takes effect */
1674 			mmiowb();
1675 			break;
1676 		case QED_ROCE_QP_STATE_ERR:
1677 			break;
1678 		default:
1679 			/* Invalid state change. */
1680 			status = -EINVAL;
1681 			break;
1682 		};
1683 		break;
1684 	case QED_ROCE_QP_STATE_RTR:
1685 		/* RTR->XXX */
1686 		switch (new_state) {
1687 		case QED_ROCE_QP_STATE_RTS:
1688 			break;
1689 		case QED_ROCE_QP_STATE_ERR:
1690 			break;
1691 		default:
1692 			/* Invalid state change. */
1693 			status = -EINVAL;
1694 			break;
1695 		};
1696 		break;
1697 	case QED_ROCE_QP_STATE_RTS:
1698 		/* RTS->XXX */
1699 		switch (new_state) {
1700 		case QED_ROCE_QP_STATE_SQD:
1701 			break;
1702 		case QED_ROCE_QP_STATE_ERR:
1703 			break;
1704 		default:
1705 			/* Invalid state change. */
1706 			status = -EINVAL;
1707 			break;
1708 		};
1709 		break;
1710 	case QED_ROCE_QP_STATE_SQD:
1711 		/* SQD->XXX */
1712 		switch (new_state) {
1713 		case QED_ROCE_QP_STATE_RTS:
1714 		case QED_ROCE_QP_STATE_ERR:
1715 			break;
1716 		default:
1717 			/* Invalid state change. */
1718 			status = -EINVAL;
1719 			break;
1720 		};
1721 		break;
1722 	case QED_ROCE_QP_STATE_ERR:
1723 		/* ERR->XXX */
1724 		switch (new_state) {
1725 		case QED_ROCE_QP_STATE_RESET:
1726 			if ((qp->rq.prod != qp->rq.cons) ||
1727 			    (qp->sq.prod != qp->sq.cons)) {
1728 				DP_NOTICE(dev,
1729 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1730 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1731 					  qp->sq.cons);
1732 				status = -EINVAL;
1733 			}
1734 			break;
1735 		default:
1736 			status = -EINVAL;
1737 			break;
1738 		};
1739 		break;
1740 	default:
1741 		status = -EINVAL;
1742 		break;
1743 	};
1744 
1745 	return status;
1746 }
1747 
1748 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1749 		   int attr_mask, struct ib_udata *udata)
1750 {
1751 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1752 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1753 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1754 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1755 	enum ib_qp_state old_qp_state, new_qp_state;
1756 	int rc = 0;
1757 
1758 	DP_DEBUG(dev, QEDR_MSG_QP,
1759 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1760 		 attr->qp_state);
1761 
1762 	old_qp_state = qedr_get_ibqp_state(qp->state);
1763 	if (attr_mask & IB_QP_STATE)
1764 		new_qp_state = attr->qp_state;
1765 	else
1766 		new_qp_state = old_qp_state;
1767 
1768 	if (!ib_modify_qp_is_ok
1769 	    (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
1770 	     IB_LINK_LAYER_ETHERNET)) {
1771 		DP_ERR(dev,
1772 		       "modify qp: invalid attribute mask=0x%x specified for\n"
1773 		       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1774 		       attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
1775 		       new_qp_state);
1776 		rc = -EINVAL;
1777 		goto err;
1778 	}
1779 
1780 	/* Translate the masks... */
1781 	if (attr_mask & IB_QP_STATE) {
1782 		SET_FIELD(qp_params.modify_flags,
1783 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1784 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1785 	}
1786 
1787 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1788 		qp_params.sqd_async = true;
1789 
1790 	if (attr_mask & IB_QP_PKEY_INDEX) {
1791 		SET_FIELD(qp_params.modify_flags,
1792 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1793 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1794 			rc = -EINVAL;
1795 			goto err;
1796 		}
1797 
1798 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1799 	}
1800 
1801 	if (attr_mask & IB_QP_QKEY)
1802 		qp->qkey = attr->qkey;
1803 
1804 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1805 		SET_FIELD(qp_params.modify_flags,
1806 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1807 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1808 						  IB_ACCESS_REMOTE_READ;
1809 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1810 						   IB_ACCESS_REMOTE_WRITE;
1811 		qp_params.incoming_atomic_en = attr->qp_access_flags &
1812 					       IB_ACCESS_REMOTE_ATOMIC;
1813 	}
1814 
1815 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1816 		if (attr_mask & IB_QP_PATH_MTU) {
1817 			if (attr->path_mtu < IB_MTU_256 ||
1818 			    attr->path_mtu > IB_MTU_4096) {
1819 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1820 				rc = -EINVAL;
1821 				goto err;
1822 			}
1823 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1824 				      ib_mtu_enum_to_int(iboe_get_mtu
1825 							 (dev->ndev->mtu)));
1826 		}
1827 
1828 		if (!qp->mtu) {
1829 			qp->mtu =
1830 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1831 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1832 		}
1833 
1834 		SET_FIELD(qp_params.modify_flags,
1835 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1836 
1837 		qp_params.traffic_class_tos = grh->traffic_class;
1838 		qp_params.flow_label = grh->flow_label;
1839 		qp_params.hop_limit_ttl = grh->hop_limit;
1840 
1841 		qp->sgid_idx = grh->sgid_index;
1842 
1843 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1844 		if (rc) {
1845 			DP_ERR(dev,
1846 			       "modify qp: problems with GID index %d (rc=%d)\n",
1847 			       grh->sgid_index, rc);
1848 			return rc;
1849 		}
1850 
1851 		rc = qedr_get_dmac(dev, &attr->ah_attr,
1852 				   qp_params.remote_mac_addr);
1853 		if (rc)
1854 			return rc;
1855 
1856 		qp_params.use_local_mac = true;
1857 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
1858 
1859 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
1860 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
1861 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
1862 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
1863 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
1864 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
1865 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
1866 			 qp_params.remote_mac_addr);
1867 
1868 		qp_params.mtu = qp->mtu;
1869 		qp_params.lb_indication = false;
1870 	}
1871 
1872 	if (!qp_params.mtu) {
1873 		/* Stay with current MTU */
1874 		if (qp->mtu)
1875 			qp_params.mtu = qp->mtu;
1876 		else
1877 			qp_params.mtu =
1878 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1879 	}
1880 
1881 	if (attr_mask & IB_QP_TIMEOUT) {
1882 		SET_FIELD(qp_params.modify_flags,
1883 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
1884 
1885 		qp_params.ack_timeout = attr->timeout;
1886 		if (attr->timeout) {
1887 			u32 temp;
1888 
1889 			temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
1890 			/* FW requires [msec] */
1891 			qp_params.ack_timeout = temp;
1892 		} else {
1893 			/* Infinite */
1894 			qp_params.ack_timeout = 0;
1895 		}
1896 	}
1897 	if (attr_mask & IB_QP_RETRY_CNT) {
1898 		SET_FIELD(qp_params.modify_flags,
1899 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
1900 		qp_params.retry_cnt = attr->retry_cnt;
1901 	}
1902 
1903 	if (attr_mask & IB_QP_RNR_RETRY) {
1904 		SET_FIELD(qp_params.modify_flags,
1905 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
1906 		qp_params.rnr_retry_cnt = attr->rnr_retry;
1907 	}
1908 
1909 	if (attr_mask & IB_QP_RQ_PSN) {
1910 		SET_FIELD(qp_params.modify_flags,
1911 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
1912 		qp_params.rq_psn = attr->rq_psn;
1913 		qp->rq_psn = attr->rq_psn;
1914 	}
1915 
1916 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1917 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
1918 			rc = -EINVAL;
1919 			DP_ERR(dev,
1920 			       "unsupported max_rd_atomic=%d, supported=%d\n",
1921 			       attr->max_rd_atomic,
1922 			       dev->attr.max_qp_req_rd_atomic_resc);
1923 			goto err;
1924 		}
1925 
1926 		SET_FIELD(qp_params.modify_flags,
1927 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
1928 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
1929 	}
1930 
1931 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1932 		SET_FIELD(qp_params.modify_flags,
1933 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
1934 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
1935 	}
1936 
1937 	if (attr_mask & IB_QP_SQ_PSN) {
1938 		SET_FIELD(qp_params.modify_flags,
1939 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
1940 		qp_params.sq_psn = attr->sq_psn;
1941 		qp->sq_psn = attr->sq_psn;
1942 	}
1943 
1944 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1945 		if (attr->max_dest_rd_atomic >
1946 		    dev->attr.max_qp_resp_rd_atomic_resc) {
1947 			DP_ERR(dev,
1948 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
1949 			       attr->max_dest_rd_atomic,
1950 			       dev->attr.max_qp_resp_rd_atomic_resc);
1951 
1952 			rc = -EINVAL;
1953 			goto err;
1954 		}
1955 
1956 		SET_FIELD(qp_params.modify_flags,
1957 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
1958 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
1959 	}
1960 
1961 	if (attr_mask & IB_QP_DEST_QPN) {
1962 		SET_FIELD(qp_params.modify_flags,
1963 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
1964 
1965 		qp_params.dest_qp = attr->dest_qp_num;
1966 		qp->dest_qp_num = attr->dest_qp_num;
1967 	}
1968 
1969 	if (qp->qp_type != IB_QPT_GSI)
1970 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
1971 					      qp->qed_qp, &qp_params);
1972 
1973 	if (attr_mask & IB_QP_STATE) {
1974 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
1975 			rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
1976 		qp->state = qp_params.new_state;
1977 	}
1978 
1979 err:
1980 	return rc;
1981 }
1982 
1983 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
1984 {
1985 	int ib_qp_acc_flags = 0;
1986 
1987 	if (params->incoming_rdma_write_en)
1988 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1989 	if (params->incoming_rdma_read_en)
1990 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
1991 	if (params->incoming_atomic_en)
1992 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
1993 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1994 	return ib_qp_acc_flags;
1995 }
1996 
1997 int qedr_query_qp(struct ib_qp *ibqp,
1998 		  struct ib_qp_attr *qp_attr,
1999 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2000 {
2001 	struct qed_rdma_query_qp_out_params params;
2002 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2003 	struct qedr_dev *dev = qp->dev;
2004 	int rc = 0;
2005 
2006 	memset(&params, 0, sizeof(params));
2007 
2008 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2009 	if (rc)
2010 		goto err;
2011 
2012 	memset(qp_attr, 0, sizeof(*qp_attr));
2013 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2014 
2015 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2016 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2017 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2018 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2019 	qp_attr->rq_psn = params.rq_psn;
2020 	qp_attr->sq_psn = params.sq_psn;
2021 	qp_attr->dest_qp_num = params.dest_qp;
2022 
2023 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2024 
2025 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2026 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2027 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2028 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2029 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2030 	qp_init_attr->cap = qp_attr->cap;
2031 
2032 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2033 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2034 			params.flow_label, qp->sgid_idx,
2035 			params.hop_limit_ttl, params.traffic_class_tos);
2036 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2037 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2038 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2039 	qp_attr->timeout = params.timeout;
2040 	qp_attr->rnr_retry = params.rnr_retry;
2041 	qp_attr->retry_cnt = params.retry_cnt;
2042 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2043 	qp_attr->pkey_index = params.pkey_index;
2044 	qp_attr->port_num = 1;
2045 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2046 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2047 	qp_attr->alt_pkey_index = 0;
2048 	qp_attr->alt_port_num = 0;
2049 	qp_attr->alt_timeout = 0;
2050 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2051 
2052 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2053 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2054 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2055 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2056 
2057 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2058 		 qp_attr->cap.max_inline_data);
2059 
2060 err:
2061 	return rc;
2062 }
2063 
2064 int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2065 {
2066 	int rc = 0;
2067 
2068 	if (qp->qp_type != IB_QPT_GSI) {
2069 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2070 		if (rc)
2071 			return rc;
2072 	}
2073 
2074 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2075 		qedr_cleanup_user(dev, qp);
2076 	else
2077 		qedr_cleanup_kernel(dev, qp);
2078 
2079 	return 0;
2080 }
2081 
2082 int qedr_destroy_qp(struct ib_qp *ibqp)
2083 {
2084 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2085 	struct qedr_dev *dev = qp->dev;
2086 	struct ib_qp_attr attr;
2087 	int attr_mask = 0;
2088 	int rc = 0;
2089 
2090 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2091 		 qp, qp->qp_type);
2092 
2093 	if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2094 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2095 	    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2096 
2097 		attr.qp_state = IB_QPS_ERR;
2098 		attr_mask |= IB_QP_STATE;
2099 
2100 		/* Change the QP state to ERROR */
2101 		qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2102 	}
2103 
2104 	if (qp->qp_type == IB_QPT_GSI)
2105 		qedr_destroy_gsi_qp(dev);
2106 
2107 	qedr_free_qp_resources(dev, qp);
2108 
2109 	kfree(qp);
2110 
2111 	return rc;
2112 }
2113 
2114 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2115 			     struct ib_udata *udata)
2116 {
2117 	struct qedr_ah *ah;
2118 
2119 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2120 	if (!ah)
2121 		return ERR_PTR(-ENOMEM);
2122 
2123 	ah->attr = *attr;
2124 
2125 	return &ah->ibah;
2126 }
2127 
2128 int qedr_destroy_ah(struct ib_ah *ibah)
2129 {
2130 	struct qedr_ah *ah = get_qedr_ah(ibah);
2131 
2132 	kfree(ah);
2133 	return 0;
2134 }
2135 
2136 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2137 {
2138 	struct qedr_pbl *pbl, *tmp;
2139 
2140 	if (info->pbl_table)
2141 		list_add_tail(&info->pbl_table->list_entry,
2142 			      &info->free_pbl_list);
2143 
2144 	if (!list_empty(&info->inuse_pbl_list))
2145 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2146 
2147 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2148 		list_del(&pbl->list_entry);
2149 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2150 	}
2151 }
2152 
2153 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2154 			size_t page_list_len, bool two_layered)
2155 {
2156 	struct qedr_pbl *tmp;
2157 	int rc;
2158 
2159 	INIT_LIST_HEAD(&info->free_pbl_list);
2160 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2161 
2162 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2163 				  page_list_len, two_layered);
2164 	if (rc)
2165 		goto done;
2166 
2167 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2168 	if (IS_ERR(info->pbl_table)) {
2169 		rc = PTR_ERR(info->pbl_table);
2170 		goto done;
2171 	}
2172 
2173 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2174 		 &info->pbl_table->pa);
2175 
2176 	/* in usual case we use 2 PBLs, so we add one to free
2177 	 * list and allocating another one
2178 	 */
2179 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2180 	if (IS_ERR(tmp)) {
2181 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2182 		goto done;
2183 	}
2184 
2185 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2186 
2187 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2188 
2189 done:
2190 	if (rc)
2191 		free_mr_info(dev, info);
2192 
2193 	return rc;
2194 }
2195 
2196 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2197 			       u64 usr_addr, int acc, struct ib_udata *udata)
2198 {
2199 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2200 	struct qedr_mr *mr;
2201 	struct qedr_pd *pd;
2202 	int rc = -ENOMEM;
2203 
2204 	pd = get_qedr_pd(ibpd);
2205 	DP_DEBUG(dev, QEDR_MSG_MR,
2206 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2207 		 pd->pd_id, start, len, usr_addr, acc);
2208 
2209 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2210 		return ERR_PTR(-EINVAL);
2211 
2212 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2213 	if (!mr)
2214 		return ERR_PTR(rc);
2215 
2216 	mr->type = QEDR_MR_USER;
2217 
2218 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2219 	if (IS_ERR(mr->umem)) {
2220 		rc = -EFAULT;
2221 		goto err0;
2222 	}
2223 
2224 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2225 	if (rc)
2226 		goto err1;
2227 
2228 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2229 			   &mr->info.pbl_info);
2230 
2231 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2232 	if (rc) {
2233 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2234 		goto err1;
2235 	}
2236 
2237 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2238 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2239 	mr->hw_mr.key = 0;
2240 	mr->hw_mr.pd = pd->pd_id;
2241 	mr->hw_mr.local_read = 1;
2242 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2243 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2244 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2245 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2246 	mr->hw_mr.mw_bind = false;
2247 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2248 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2249 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2250 	mr->hw_mr.page_size_log = mr->umem->page_shift;
2251 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2252 	mr->hw_mr.length = len;
2253 	mr->hw_mr.vaddr = usr_addr;
2254 	mr->hw_mr.zbva = false;
2255 	mr->hw_mr.phy_mr = false;
2256 	mr->hw_mr.dma_mr = false;
2257 
2258 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2259 	if (rc) {
2260 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2261 		goto err2;
2262 	}
2263 
2264 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2265 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2266 	    mr->hw_mr.remote_atomic)
2267 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2268 
2269 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2270 		 mr->ibmr.lkey);
2271 	return &mr->ibmr;
2272 
2273 err2:
2274 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2275 err1:
2276 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2277 err0:
2278 	kfree(mr);
2279 	return ERR_PTR(rc);
2280 }
2281 
2282 int qedr_dereg_mr(struct ib_mr *ib_mr)
2283 {
2284 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2285 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2286 	int rc = 0;
2287 
2288 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2289 	if (rc)
2290 		return rc;
2291 
2292 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2293 
2294 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2295 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2296 
2297 	/* it could be user registered memory. */
2298 	if (mr->umem)
2299 		ib_umem_release(mr->umem);
2300 
2301 	kfree(mr);
2302 
2303 	return rc;
2304 }
2305 
2306 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2307 				       int max_page_list_len)
2308 {
2309 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2310 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2311 	struct qedr_mr *mr;
2312 	int rc = -ENOMEM;
2313 
2314 	DP_DEBUG(dev, QEDR_MSG_MR,
2315 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2316 		 max_page_list_len);
2317 
2318 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2319 	if (!mr)
2320 		return ERR_PTR(rc);
2321 
2322 	mr->dev = dev;
2323 	mr->type = QEDR_MR_FRMR;
2324 
2325 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2326 	if (rc)
2327 		goto err0;
2328 
2329 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2330 	if (rc) {
2331 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2332 		goto err0;
2333 	}
2334 
2335 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2336 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2337 	mr->hw_mr.key = 0;
2338 	mr->hw_mr.pd = pd->pd_id;
2339 	mr->hw_mr.local_read = 1;
2340 	mr->hw_mr.local_write = 0;
2341 	mr->hw_mr.remote_read = 0;
2342 	mr->hw_mr.remote_write = 0;
2343 	mr->hw_mr.remote_atomic = 0;
2344 	mr->hw_mr.mw_bind = false;
2345 	mr->hw_mr.pbl_ptr = 0;
2346 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2347 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2348 	mr->hw_mr.fbo = 0;
2349 	mr->hw_mr.length = 0;
2350 	mr->hw_mr.vaddr = 0;
2351 	mr->hw_mr.zbva = false;
2352 	mr->hw_mr.phy_mr = true;
2353 	mr->hw_mr.dma_mr = false;
2354 
2355 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2356 	if (rc) {
2357 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2358 		goto err1;
2359 	}
2360 
2361 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2362 	mr->ibmr.rkey = mr->ibmr.lkey;
2363 
2364 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2365 	return mr;
2366 
2367 err1:
2368 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2369 err0:
2370 	kfree(mr);
2371 	return ERR_PTR(rc);
2372 }
2373 
2374 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2375 			    enum ib_mr_type mr_type, u32 max_num_sg)
2376 {
2377 	struct qedr_dev *dev;
2378 	struct qedr_mr *mr;
2379 
2380 	if (mr_type != IB_MR_TYPE_MEM_REG)
2381 		return ERR_PTR(-EINVAL);
2382 
2383 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2384 
2385 	if (IS_ERR(mr))
2386 		return ERR_PTR(-EINVAL);
2387 
2388 	dev = mr->dev;
2389 
2390 	return &mr->ibmr;
2391 }
2392 
2393 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2394 {
2395 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2396 	struct qedr_pbl *pbl_table;
2397 	struct regpair *pbe;
2398 	u32 pbes_in_page;
2399 
2400 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2401 		DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2402 		return -ENOMEM;
2403 	}
2404 
2405 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2406 		 mr->npages, addr);
2407 
2408 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2409 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2410 	pbe = (struct regpair *)pbl_table->va;
2411 	pbe +=  mr->npages % pbes_in_page;
2412 	pbe->lo = cpu_to_le32((u32)addr);
2413 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2414 
2415 	mr->npages++;
2416 
2417 	return 0;
2418 }
2419 
2420 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2421 {
2422 	int work = info->completed - info->completed_handled - 1;
2423 
2424 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2425 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2426 		struct qedr_pbl *pbl;
2427 
2428 		/* Free all the page list that are possible to be freed
2429 		 * (all the ones that were invalidated), under the assumption
2430 		 * that if an FMR was completed successfully that means that
2431 		 * if there was an invalidate operation before it also ended
2432 		 */
2433 		pbl = list_first_entry(&info->inuse_pbl_list,
2434 				       struct qedr_pbl, list_entry);
2435 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2436 		info->completed_handled++;
2437 	}
2438 }
2439 
2440 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2441 		   int sg_nents, unsigned int *sg_offset)
2442 {
2443 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2444 
2445 	mr->npages = 0;
2446 
2447 	handle_completed_mrs(mr->dev, &mr->info);
2448 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2449 }
2450 
2451 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2452 {
2453 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2454 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2455 	struct qedr_mr *mr;
2456 	int rc;
2457 
2458 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2459 	if (!mr)
2460 		return ERR_PTR(-ENOMEM);
2461 
2462 	mr->type = QEDR_MR_DMA;
2463 
2464 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2465 	if (rc) {
2466 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2467 		goto err1;
2468 	}
2469 
2470 	/* index only, 18 bit long, lkey = itid << 8 | key */
2471 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2472 	mr->hw_mr.pd = pd->pd_id;
2473 	mr->hw_mr.local_read = 1;
2474 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2475 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2476 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2477 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2478 	mr->hw_mr.dma_mr = true;
2479 
2480 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2481 	if (rc) {
2482 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2483 		goto err2;
2484 	}
2485 
2486 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2487 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2488 	    mr->hw_mr.remote_atomic)
2489 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2490 
2491 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2492 	return &mr->ibmr;
2493 
2494 err2:
2495 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2496 err1:
2497 	kfree(mr);
2498 	return ERR_PTR(rc);
2499 }
2500 
2501 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2502 {
2503 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2504 }
2505 
2506 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2507 {
2508 	int i, len = 0;
2509 
2510 	for (i = 0; i < num_sge; i++)
2511 		len += sg_list[i].length;
2512 
2513 	return len;
2514 }
2515 
2516 static void swap_wqe_data64(u64 *p)
2517 {
2518 	int i;
2519 
2520 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2521 		*p = cpu_to_be64(cpu_to_le64(*p));
2522 }
2523 
2524 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2525 				       struct qedr_qp *qp, u8 *wqe_size,
2526 				       struct ib_send_wr *wr,
2527 				       struct ib_send_wr **bad_wr, u8 *bits,
2528 				       u8 bit)
2529 {
2530 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2531 	char *seg_prt, *wqe;
2532 	int i, seg_siz;
2533 
2534 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2535 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2536 		*bad_wr = wr;
2537 		return 0;
2538 	}
2539 
2540 	if (!data_size)
2541 		return data_size;
2542 
2543 	*bits |= bit;
2544 
2545 	seg_prt = NULL;
2546 	wqe = NULL;
2547 	seg_siz = 0;
2548 
2549 	/* Copy data inline */
2550 	for (i = 0; i < wr->num_sge; i++) {
2551 		u32 len = wr->sg_list[i].length;
2552 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2553 
2554 		while (len > 0) {
2555 			u32 cur;
2556 
2557 			/* New segment required */
2558 			if (!seg_siz) {
2559 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2560 				seg_prt = wqe;
2561 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2562 				(*wqe_size)++;
2563 			}
2564 
2565 			/* Calculate currently allowed length */
2566 			cur = min_t(u32, len, seg_siz);
2567 			memcpy(seg_prt, src, cur);
2568 
2569 			/* Update segment variables */
2570 			seg_prt += cur;
2571 			seg_siz -= cur;
2572 
2573 			/* Update sge variables */
2574 			src += cur;
2575 			len -= cur;
2576 
2577 			/* Swap fully-completed segments */
2578 			if (!seg_siz)
2579 				swap_wqe_data64((u64 *)wqe);
2580 		}
2581 	}
2582 
2583 	/* swap last not completed segment */
2584 	if (seg_siz)
2585 		swap_wqe_data64((u64 *)wqe);
2586 
2587 	return data_size;
2588 }
2589 
2590 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2591 	do {							\
2592 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2593 		(sge)->length = cpu_to_le32(vlength);		\
2594 		(sge)->flags = cpu_to_le32(vflags);		\
2595 	} while (0)
2596 
2597 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2598 	do {							\
2599 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2600 		(hdr)->num_sges = num_sge;			\
2601 	} while (0)
2602 
2603 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2604 	do {							\
2605 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2606 		(sge)->length = cpu_to_le32(vlength);		\
2607 		(sge)->l_key = cpu_to_le32(vlkey);		\
2608 	} while (0)
2609 
2610 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2611 				struct ib_send_wr *wr)
2612 {
2613 	u32 data_size = 0;
2614 	int i;
2615 
2616 	for (i = 0; i < wr->num_sge; i++) {
2617 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2618 
2619 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2620 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2621 		sge->length = cpu_to_le32(wr->sg_list[i].length);
2622 		data_size += wr->sg_list[i].length;
2623 	}
2624 
2625 	if (wqe_size)
2626 		*wqe_size += wr->num_sge;
2627 
2628 	return data_size;
2629 }
2630 
2631 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2632 				     struct qedr_qp *qp,
2633 				     struct rdma_sq_rdma_wqe_1st *rwqe,
2634 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2635 				     struct ib_send_wr *wr,
2636 				     struct ib_send_wr **bad_wr)
2637 {
2638 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2639 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2640 
2641 	if (wr->send_flags & IB_SEND_INLINE &&
2642 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2643 	     wr->opcode == IB_WR_RDMA_WRITE)) {
2644 		u8 flags = 0;
2645 
2646 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2647 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2648 						   bad_wr, &rwqe->flags, flags);
2649 	}
2650 
2651 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2652 }
2653 
2654 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2655 				     struct qedr_qp *qp,
2656 				     struct rdma_sq_send_wqe_1st *swqe,
2657 				     struct rdma_sq_send_wqe_2st *swqe2,
2658 				     struct ib_send_wr *wr,
2659 				     struct ib_send_wr **bad_wr)
2660 {
2661 	memset(swqe2, 0, sizeof(*swqe2));
2662 	if (wr->send_flags & IB_SEND_INLINE) {
2663 		u8 flags = 0;
2664 
2665 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2666 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2667 						   bad_wr, &swqe->flags, flags);
2668 	}
2669 
2670 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2671 }
2672 
2673 static int qedr_prepare_reg(struct qedr_qp *qp,
2674 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2675 			    struct ib_reg_wr *wr)
2676 {
2677 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2678 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2679 
2680 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2681 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2682 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2683 	fwqe1->l_key = wr->key;
2684 
2685 	fwqe2->access_ctrl = 0;
2686 
2687 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2688 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2689 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2690 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2691 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2692 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2693 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2694 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2695 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2696 	fwqe2->fmr_ctrl = 0;
2697 
2698 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2699 		   ilog2(mr->ibmr.page_size) - 12);
2700 
2701 	fwqe2->length_hi = 0;
2702 	fwqe2->length_lo = mr->ibmr.length;
2703 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2704 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2705 
2706 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2707 
2708 	return 0;
2709 }
2710 
2711 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2712 {
2713 	switch (opcode) {
2714 	case IB_WR_RDMA_WRITE:
2715 	case IB_WR_RDMA_WRITE_WITH_IMM:
2716 		return IB_WC_RDMA_WRITE;
2717 	case IB_WR_SEND_WITH_IMM:
2718 	case IB_WR_SEND:
2719 	case IB_WR_SEND_WITH_INV:
2720 		return IB_WC_SEND;
2721 	case IB_WR_RDMA_READ:
2722 		return IB_WC_RDMA_READ;
2723 	case IB_WR_ATOMIC_CMP_AND_SWP:
2724 		return IB_WC_COMP_SWAP;
2725 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2726 		return IB_WC_FETCH_ADD;
2727 	case IB_WR_REG_MR:
2728 		return IB_WC_REG_MR;
2729 	case IB_WR_LOCAL_INV:
2730 		return IB_WC_LOCAL_INV;
2731 	default:
2732 		return IB_WC_SEND;
2733 	}
2734 }
2735 
2736 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2737 {
2738 	int wq_is_full, err_wr, pbl_is_full;
2739 	struct qedr_dev *dev = qp->dev;
2740 
2741 	/* prevent SQ overflow and/or processing of a bad WR */
2742 	err_wr = wr->num_sge > qp->sq.max_sges;
2743 	wq_is_full = qedr_wq_is_full(&qp->sq);
2744 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2745 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2746 	if (wq_is_full || err_wr || pbl_is_full) {
2747 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2748 			DP_ERR(dev,
2749 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2750 			       qp);
2751 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2752 		}
2753 
2754 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2755 			DP_ERR(dev,
2756 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2757 			       qp);
2758 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2759 		}
2760 
2761 		if (pbl_is_full &&
2762 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2763 			DP_ERR(dev,
2764 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2765 			       qp);
2766 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2767 		}
2768 		return false;
2769 	}
2770 	return true;
2771 }
2772 
2773 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2774 		     struct ib_send_wr **bad_wr)
2775 {
2776 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2777 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2778 	struct rdma_sq_atomic_wqe_1st *awqe1;
2779 	struct rdma_sq_atomic_wqe_2nd *awqe2;
2780 	struct rdma_sq_atomic_wqe_3rd *awqe3;
2781 	struct rdma_sq_send_wqe_2st *swqe2;
2782 	struct rdma_sq_local_inv_wqe *iwqe;
2783 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2784 	struct rdma_sq_send_wqe_1st *swqe;
2785 	struct rdma_sq_rdma_wqe_1st *rwqe;
2786 	struct rdma_sq_fmr_wqe_1st *fwqe1;
2787 	struct rdma_sq_common_wqe *wqe;
2788 	u32 length;
2789 	int rc = 0;
2790 	bool comp;
2791 
2792 	if (!qedr_can_post_send(qp, wr)) {
2793 		*bad_wr = wr;
2794 		return -ENOMEM;
2795 	}
2796 
2797 	wqe = qed_chain_produce(&qp->sq.pbl);
2798 	qp->wqe_wr_id[qp->sq.prod].signaled =
2799 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2800 
2801 	wqe->flags = 0;
2802 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2803 		   !!(wr->send_flags & IB_SEND_SOLICITED));
2804 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2805 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2806 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2807 		   !!(wr->send_flags & IB_SEND_FENCE));
2808 	wqe->prev_wqe_size = qp->prev_wqe_size;
2809 
2810 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2811 
2812 	switch (wr->opcode) {
2813 	case IB_WR_SEND_WITH_IMM:
2814 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
2815 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2816 		swqe->wqe_size = 2;
2817 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2818 
2819 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
2820 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2821 						   wr, bad_wr);
2822 		swqe->length = cpu_to_le32(length);
2823 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2824 		qp->prev_wqe_size = swqe->wqe_size;
2825 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2826 		break;
2827 	case IB_WR_SEND:
2828 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
2829 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2830 
2831 		swqe->wqe_size = 2;
2832 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2833 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2834 						   wr, bad_wr);
2835 		swqe->length = cpu_to_le32(length);
2836 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2837 		qp->prev_wqe_size = swqe->wqe_size;
2838 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2839 		break;
2840 	case IB_WR_SEND_WITH_INV:
2841 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
2842 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2843 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2844 		swqe->wqe_size = 2;
2845 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
2846 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2847 						   wr, bad_wr);
2848 		swqe->length = cpu_to_le32(length);
2849 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2850 		qp->prev_wqe_size = swqe->wqe_size;
2851 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2852 		break;
2853 
2854 	case IB_WR_RDMA_WRITE_WITH_IMM:
2855 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
2856 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2857 
2858 		rwqe->wqe_size = 2;
2859 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
2860 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2861 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2862 						   wr, bad_wr);
2863 		rwqe->length = cpu_to_le32(length);
2864 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2865 		qp->prev_wqe_size = rwqe->wqe_size;
2866 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2867 		break;
2868 	case IB_WR_RDMA_WRITE:
2869 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
2870 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2871 
2872 		rwqe->wqe_size = 2;
2873 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2874 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2875 						   wr, bad_wr);
2876 		rwqe->length = cpu_to_le32(length);
2877 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2878 		qp->prev_wqe_size = rwqe->wqe_size;
2879 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2880 		break;
2881 	case IB_WR_RDMA_READ_WITH_INV:
2882 		DP_ERR(dev,
2883 		       "RDMA READ WITH INVALIDATE not supported\n");
2884 		*bad_wr = wr;
2885 		rc = -EINVAL;
2886 		break;
2887 
2888 	case IB_WR_RDMA_READ:
2889 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
2890 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2891 
2892 		rwqe->wqe_size = 2;
2893 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2894 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2895 						   wr, bad_wr);
2896 		rwqe->length = cpu_to_le32(length);
2897 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2898 		qp->prev_wqe_size = rwqe->wqe_size;
2899 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2900 		break;
2901 
2902 	case IB_WR_ATOMIC_CMP_AND_SWP:
2903 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2904 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
2905 		awqe1->wqe_size = 4;
2906 
2907 		awqe2 = qed_chain_produce(&qp->sq.pbl);
2908 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
2909 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
2910 
2911 		awqe3 = qed_chain_produce(&qp->sq.pbl);
2912 
2913 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2914 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
2915 			DMA_REGPAIR_LE(awqe3->swap_data,
2916 				       atomic_wr(wr)->compare_add);
2917 		} else {
2918 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
2919 			DMA_REGPAIR_LE(awqe3->swap_data,
2920 				       atomic_wr(wr)->swap);
2921 			DMA_REGPAIR_LE(awqe3->cmp_data,
2922 				       atomic_wr(wr)->compare_add);
2923 		}
2924 
2925 		qedr_prepare_sq_sges(qp, NULL, wr);
2926 
2927 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
2928 		qp->prev_wqe_size = awqe1->wqe_size;
2929 		break;
2930 
2931 	case IB_WR_LOCAL_INV:
2932 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
2933 		iwqe->wqe_size = 1;
2934 
2935 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
2936 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
2937 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
2938 		qp->prev_wqe_size = iwqe->wqe_size;
2939 		break;
2940 	case IB_WR_REG_MR:
2941 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
2942 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
2943 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
2944 		fwqe1->wqe_size = 2;
2945 
2946 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
2947 		if (rc) {
2948 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
2949 			*bad_wr = wr;
2950 			break;
2951 		}
2952 
2953 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
2954 		qp->prev_wqe_size = fwqe1->wqe_size;
2955 		break;
2956 	default:
2957 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
2958 		rc = -EINVAL;
2959 		*bad_wr = wr;
2960 		break;
2961 	}
2962 
2963 	if (*bad_wr) {
2964 		u16 value;
2965 
2966 		/* Restore prod to its position before
2967 		 * this WR was processed
2968 		 */
2969 		value = le16_to_cpu(qp->sq.db_data.data.value);
2970 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
2971 
2972 		/* Restore prev_wqe_size */
2973 		qp->prev_wqe_size = wqe->prev_wqe_size;
2974 		rc = -EINVAL;
2975 		DP_ERR(dev, "POST SEND FAILED\n");
2976 	}
2977 
2978 	return rc;
2979 }
2980 
2981 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2982 		   struct ib_send_wr **bad_wr)
2983 {
2984 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2985 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2986 	unsigned long flags;
2987 	int rc = 0;
2988 
2989 	*bad_wr = NULL;
2990 
2991 	if (qp->qp_type == IB_QPT_GSI)
2992 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
2993 
2994 	spin_lock_irqsave(&qp->q_lock, flags);
2995 
2996 	if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
2997 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2998 	    (qp->state != QED_ROCE_QP_STATE_SQD)) {
2999 		spin_unlock_irqrestore(&qp->q_lock, flags);
3000 		*bad_wr = wr;
3001 		DP_DEBUG(dev, QEDR_MSG_CQ,
3002 			 "QP in wrong state! QP icid=0x%x state %d\n",
3003 			 qp->icid, qp->state);
3004 		return -EINVAL;
3005 	}
3006 
3007 	while (wr) {
3008 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3009 		if (rc)
3010 			break;
3011 
3012 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3013 
3014 		qedr_inc_sw_prod(&qp->sq);
3015 
3016 		qp->sq.db_data.data.value++;
3017 
3018 		wr = wr->next;
3019 	}
3020 
3021 	/* Trigger doorbell
3022 	 * If there was a failure in the first WR then it will be triggered in
3023 	 * vane. However this is not harmful (as long as the producer value is
3024 	 * unchanged). For performance reasons we avoid checking for this
3025 	 * redundant doorbell.
3026 	 */
3027 	wmb();
3028 	writel(qp->sq.db_data.raw, qp->sq.db);
3029 
3030 	/* Make sure write sticks */
3031 	mmiowb();
3032 
3033 	spin_unlock_irqrestore(&qp->q_lock, flags);
3034 
3035 	return rc;
3036 }
3037 
3038 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3039 		   struct ib_recv_wr **bad_wr)
3040 {
3041 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3042 	struct qedr_dev *dev = qp->dev;
3043 	unsigned long flags;
3044 	int status = 0;
3045 
3046 	if (qp->qp_type == IB_QPT_GSI)
3047 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3048 
3049 	spin_lock_irqsave(&qp->q_lock, flags);
3050 
3051 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3052 		spin_unlock_irqrestore(&qp->q_lock, flags);
3053 		*bad_wr = wr;
3054 		return -EINVAL;
3055 	}
3056 
3057 	while (wr) {
3058 		int i;
3059 
3060 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3061 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3062 		    wr->num_sge > qp->rq.max_sges) {
3063 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3064 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3065 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3066 			       qp->rq.max_sges);
3067 			status = -ENOMEM;
3068 			*bad_wr = wr;
3069 			break;
3070 		}
3071 		for (i = 0; i < wr->num_sge; i++) {
3072 			u32 flags = 0;
3073 			struct rdma_rq_sge *rqe =
3074 			    qed_chain_produce(&qp->rq.pbl);
3075 
3076 			/* First one must include the number
3077 			 * of SGE in the list
3078 			 */
3079 			if (!i)
3080 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3081 					  wr->num_sge);
3082 
3083 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3084 				  wr->sg_list[i].lkey);
3085 
3086 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3087 				   wr->sg_list[i].length, flags);
3088 		}
3089 
3090 		/* Special case of no sges. FW requires between 1-4 sges...
3091 		 * in this case we need to post 1 sge with length zero. this is
3092 		 * because rdma write with immediate consumes an RQ.
3093 		 */
3094 		if (!wr->num_sge) {
3095 			u32 flags = 0;
3096 			struct rdma_rq_sge *rqe =
3097 			    qed_chain_produce(&qp->rq.pbl);
3098 
3099 			/* First one must include the number
3100 			 * of SGE in the list
3101 			 */
3102 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3103 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3104 
3105 			RQ_SGE_SET(rqe, 0, 0, flags);
3106 			i = 1;
3107 		}
3108 
3109 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3110 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3111 
3112 		qedr_inc_sw_prod(&qp->rq);
3113 
3114 		/* Flush all the writes before signalling doorbell */
3115 		wmb();
3116 
3117 		qp->rq.db_data.data.value++;
3118 
3119 		writel(qp->rq.db_data.raw, qp->rq.db);
3120 
3121 		/* Make sure write sticks */
3122 		mmiowb();
3123 
3124 		wr = wr->next;
3125 	}
3126 
3127 	spin_unlock_irqrestore(&qp->q_lock, flags);
3128 
3129 	return status;
3130 }
3131 
3132 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3133 {
3134 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3135 
3136 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3137 		cq->pbl_toggle;
3138 }
3139 
3140 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3141 {
3142 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3143 	struct qedr_qp *qp;
3144 
3145 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3146 						   resp_cqe->qp_handle.lo,
3147 						   u64);
3148 	return qp;
3149 }
3150 
3151 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3152 {
3153 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3154 
3155 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3156 }
3157 
3158 /* Return latest CQE (needs processing) */
3159 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3160 {
3161 	return cq->latest_cqe;
3162 }
3163 
3164 /* In fmr we need to increase the number of fmr completed counter for the fmr
3165  * algorithm determining whether we can free a pbl or not.
3166  * we need to perform this whether the work request was signaled or not. for
3167  * this purpose we call this function from the condition that checks if a wr
3168  * should be skipped, to make sure we don't miss it ( possibly this fmr
3169  * operation was not signalted)
3170  */
3171 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3172 {
3173 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3174 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3175 }
3176 
3177 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3178 		       struct qedr_cq *cq, int num_entries,
3179 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3180 		       int force)
3181 {
3182 	u16 cnt = 0;
3183 
3184 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3185 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3186 			qedr_chk_if_fmr(qp);
3187 			/* skip WC */
3188 			goto next_cqe;
3189 		}
3190 
3191 		/* fill WC */
3192 		wc->status = status;
3193 		wc->vendor_err = 0;
3194 		wc->wc_flags = 0;
3195 		wc->src_qp = qp->id;
3196 		wc->qp = &qp->ibqp;
3197 
3198 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3199 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3200 
3201 		switch (wc->opcode) {
3202 		case IB_WC_RDMA_WRITE:
3203 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3204 			break;
3205 		case IB_WC_COMP_SWAP:
3206 		case IB_WC_FETCH_ADD:
3207 			wc->byte_len = 8;
3208 			break;
3209 		case IB_WC_REG_MR:
3210 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3211 			break;
3212 		default:
3213 			break;
3214 		}
3215 
3216 		num_entries--;
3217 		wc++;
3218 		cnt++;
3219 next_cqe:
3220 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3221 			qed_chain_consume(&qp->sq.pbl);
3222 		qedr_inc_sw_cons(&qp->sq);
3223 	}
3224 
3225 	return cnt;
3226 }
3227 
3228 static int qedr_poll_cq_req(struct qedr_dev *dev,
3229 			    struct qedr_qp *qp, struct qedr_cq *cq,
3230 			    int num_entries, struct ib_wc *wc,
3231 			    struct rdma_cqe_requester *req)
3232 {
3233 	int cnt = 0;
3234 
3235 	switch (req->status) {
3236 	case RDMA_CQE_REQ_STS_OK:
3237 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3238 				  IB_WC_SUCCESS, 0);
3239 		break;
3240 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3241 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3242 			DP_ERR(dev,
3243 			       "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3244 			       cq->icid, qp->icid);
3245 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3246 				  IB_WC_WR_FLUSH_ERR, 1);
3247 		break;
3248 	default:
3249 		/* process all WQE before the cosumer */
3250 		qp->state = QED_ROCE_QP_STATE_ERR;
3251 		cnt = process_req(dev, qp, cq, num_entries, wc,
3252 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3253 		wc += cnt;
3254 		/* if we have extra WC fill it with actual error info */
3255 		if (cnt < num_entries) {
3256 			enum ib_wc_status wc_status;
3257 
3258 			switch (req->status) {
3259 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3260 				DP_ERR(dev,
3261 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3262 				       cq->icid, qp->icid);
3263 				wc_status = IB_WC_BAD_RESP_ERR;
3264 				break;
3265 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3266 				DP_ERR(dev,
3267 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3268 				       cq->icid, qp->icid);
3269 				wc_status = IB_WC_LOC_LEN_ERR;
3270 				break;
3271 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3272 				DP_ERR(dev,
3273 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3274 				       cq->icid, qp->icid);
3275 				wc_status = IB_WC_LOC_QP_OP_ERR;
3276 				break;
3277 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3278 				DP_ERR(dev,
3279 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3280 				       cq->icid, qp->icid);
3281 				wc_status = IB_WC_LOC_PROT_ERR;
3282 				break;
3283 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3284 				DP_ERR(dev,
3285 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3286 				       cq->icid, qp->icid);
3287 				wc_status = IB_WC_MW_BIND_ERR;
3288 				break;
3289 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3290 				DP_ERR(dev,
3291 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3292 				       cq->icid, qp->icid);
3293 				wc_status = IB_WC_REM_INV_REQ_ERR;
3294 				break;
3295 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3296 				DP_ERR(dev,
3297 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3298 				       cq->icid, qp->icid);
3299 				wc_status = IB_WC_REM_ACCESS_ERR;
3300 				break;
3301 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3302 				DP_ERR(dev,
3303 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3304 				       cq->icid, qp->icid);
3305 				wc_status = IB_WC_REM_OP_ERR;
3306 				break;
3307 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3308 				DP_ERR(dev,
3309 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3310 				       cq->icid, qp->icid);
3311 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3312 				break;
3313 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3314 				DP_ERR(dev,
3315 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3316 				       cq->icid, qp->icid);
3317 				wc_status = IB_WC_RETRY_EXC_ERR;
3318 				break;
3319 			default:
3320 				DP_ERR(dev,
3321 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3322 				       cq->icid, qp->icid);
3323 				wc_status = IB_WC_GENERAL_ERR;
3324 			}
3325 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3326 					   wc_status, 1);
3327 		}
3328 	}
3329 
3330 	return cnt;
3331 }
3332 
3333 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3334 {
3335 	switch (status) {
3336 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3337 		return IB_WC_LOC_ACCESS_ERR;
3338 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3339 		return IB_WC_LOC_LEN_ERR;
3340 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3341 		return IB_WC_LOC_QP_OP_ERR;
3342 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3343 		return IB_WC_LOC_PROT_ERR;
3344 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3345 		return IB_WC_MW_BIND_ERR;
3346 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3347 		return IB_WC_REM_INV_RD_REQ_ERR;
3348 	case RDMA_CQE_RESP_STS_OK:
3349 		return IB_WC_SUCCESS;
3350 	default:
3351 		return IB_WC_GENERAL_ERR;
3352 	}
3353 }
3354 
3355 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3356 					  struct ib_wc *wc)
3357 {
3358 	wc->status = IB_WC_SUCCESS;
3359 	wc->byte_len = le32_to_cpu(resp->length);
3360 
3361 	if (resp->flags & QEDR_RESP_IMM) {
3362 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3363 		wc->wc_flags |= IB_WC_WITH_IMM;
3364 
3365 		if (resp->flags & QEDR_RESP_RDMA)
3366 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3367 
3368 		if (resp->flags & QEDR_RESP_INV)
3369 			return -EINVAL;
3370 
3371 	} else if (resp->flags & QEDR_RESP_INV) {
3372 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3373 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3374 
3375 		if (resp->flags & QEDR_RESP_RDMA)
3376 			return -EINVAL;
3377 
3378 	} else if (resp->flags & QEDR_RESP_RDMA) {
3379 		return -EINVAL;
3380 	}
3381 
3382 	return 0;
3383 }
3384 
3385 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3386 			       struct qedr_cq *cq, struct ib_wc *wc,
3387 			       struct rdma_cqe_responder *resp, u64 wr_id)
3388 {
3389 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3390 	wc->opcode = IB_WC_RECV;
3391 	wc->wc_flags = 0;
3392 
3393 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3394 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3395 			DP_ERR(dev,
3396 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3397 			       cq, cq->icid, resp->flags);
3398 
3399 	} else {
3400 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3401 		if (wc->status == IB_WC_GENERAL_ERR)
3402 			DP_ERR(dev,
3403 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3404 			       cq, cq->icid, resp->status);
3405 	}
3406 
3407 	/* Fill the rest of the WC */
3408 	wc->vendor_err = 0;
3409 	wc->src_qp = qp->id;
3410 	wc->qp = &qp->ibqp;
3411 	wc->wr_id = wr_id;
3412 }
3413 
3414 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3415 			    struct qedr_cq *cq, struct ib_wc *wc,
3416 			    struct rdma_cqe_responder *resp)
3417 {
3418 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3419 
3420 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3421 
3422 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3423 		qed_chain_consume(&qp->rq.pbl);
3424 	qedr_inc_sw_cons(&qp->rq);
3425 
3426 	return 1;
3427 }
3428 
3429 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3430 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3431 {
3432 	u16 cnt = 0;
3433 
3434 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3435 		/* fill WC */
3436 		wc->status = IB_WC_WR_FLUSH_ERR;
3437 		wc->vendor_err = 0;
3438 		wc->wc_flags = 0;
3439 		wc->src_qp = qp->id;
3440 		wc->byte_len = 0;
3441 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3442 		wc->qp = &qp->ibqp;
3443 		num_entries--;
3444 		wc++;
3445 		cnt++;
3446 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3447 			qed_chain_consume(&qp->rq.pbl);
3448 		qedr_inc_sw_cons(&qp->rq);
3449 	}
3450 
3451 	return cnt;
3452 }
3453 
3454 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3455 				 struct rdma_cqe_responder *resp, int *update)
3456 {
3457 	if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
3458 		consume_cqe(cq);
3459 		*update |= 1;
3460 	}
3461 }
3462 
3463 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3464 			     struct qedr_cq *cq, int num_entries,
3465 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3466 			     int *update)
3467 {
3468 	int cnt;
3469 
3470 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3471 		cnt = process_resp_flush(qp, cq, num_entries, wc,
3472 					 resp->rq_cons);
3473 		try_consume_resp_cqe(cq, qp, resp, update);
3474 	} else {
3475 		cnt = process_resp_one(dev, qp, cq, wc, resp);
3476 		consume_cqe(cq);
3477 		*update |= 1;
3478 	}
3479 
3480 	return cnt;
3481 }
3482 
3483 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3484 				struct rdma_cqe_requester *req, int *update)
3485 {
3486 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3487 		consume_cqe(cq);
3488 		*update |= 1;
3489 	}
3490 }
3491 
3492 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3493 {
3494 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3495 	struct qedr_cq *cq = get_qedr_cq(ibcq);
3496 	union rdma_cqe *cqe = cq->latest_cqe;
3497 	u32 old_cons, new_cons;
3498 	unsigned long flags;
3499 	int update = 0;
3500 	int done = 0;
3501 
3502 	if (cq->destroyed) {
3503 		DP_ERR(dev,
3504 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3505 		       cq, cq->icid);
3506 		return 0;
3507 	}
3508 
3509 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3510 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3511 
3512 	spin_lock_irqsave(&cq->cq_lock, flags);
3513 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3514 	while (num_entries && is_valid_cqe(cq, cqe)) {
3515 		struct qedr_qp *qp;
3516 		int cnt = 0;
3517 
3518 		/* prevent speculative reads of any field of CQE */
3519 		rmb();
3520 
3521 		qp = cqe_get_qp(cqe);
3522 		if (!qp) {
3523 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3524 			break;
3525 		}
3526 
3527 		wc->qp = &qp->ibqp;
3528 
3529 		switch (cqe_get_type(cqe)) {
3530 		case RDMA_CQE_TYPE_REQUESTER:
3531 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3532 					       &cqe->req);
3533 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3534 			break;
3535 		case RDMA_CQE_TYPE_RESPONDER_RQ:
3536 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3537 						&cqe->resp, &update);
3538 			break;
3539 		case RDMA_CQE_TYPE_INVALID:
3540 		default:
3541 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3542 			       cqe_get_type(cqe));
3543 		}
3544 		num_entries -= cnt;
3545 		wc += cnt;
3546 		done += cnt;
3547 
3548 		cqe = get_cqe(cq);
3549 	}
3550 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3551 
3552 	cq->cq_cons += new_cons - old_cons;
3553 
3554 	if (update)
3555 		/* doorbell notifies abount latest VALID entry,
3556 		 * but chain already point to the next INVALID one
3557 		 */
3558 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3559 
3560 	spin_unlock_irqrestore(&cq->cq_lock, flags);
3561 	return done;
3562 }
3563 
3564 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3565 		     u8 port_num,
3566 		     const struct ib_wc *in_wc,
3567 		     const struct ib_grh *in_grh,
3568 		     const struct ib_mad_hdr *mad_hdr,
3569 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3570 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3571 {
3572 	struct qedr_dev *dev = get_qedr_dev(ibdev);
3573 
3574 	DP_DEBUG(dev, QEDR_MSG_GSI,
3575 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3576 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3577 		 mad_hdr->class_specific, mad_hdr->class_version,
3578 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3579 	return IB_MAD_RESULT_SUCCESS;
3580 }
3581 
3582 int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
3583 			struct ib_port_immutable *immutable)
3584 {
3585 	struct ib_port_attr attr;
3586 	int err;
3587 
3588 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3589 				    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3590 
3591 	err = ib_query_port(ibdev, port_num, &attr);
3592 	if (err)
3593 		return err;
3594 
3595 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
3596 	immutable->gid_tbl_len = attr.gid_tbl_len;
3597 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
3598 
3599 	return 0;
3600 }
3601