xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision a977d045)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_cm.h"
53 
54 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57 					size_t len)
58 {
59 	size_t min_len = min_t(size_t, len, udata->outlen);
60 
61 	return ib_copy_to_udata(udata, src, min_len);
62 }
63 
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
65 {
66 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67 		return -EINVAL;
68 
69 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
70 	return 0;
71 }
72 
73 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
74 		   union ib_gid *sgid)
75 {
76 	struct qedr_dev *dev = get_qedr_dev(ibdev);
77 	int rc = 0;
78 
79 	if (!rdma_cap_roce_gid_table(ibdev, port))
80 		return -ENODEV;
81 
82 	rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
83 	if (rc == -EAGAIN) {
84 		memcpy(sgid, &zgid, sizeof(*sgid));
85 		return 0;
86 	}
87 
88 	DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
89 		 sgid->global.interface_id, sgid->global.subnet_prefix);
90 
91 	return rc;
92 }
93 
94 int qedr_add_gid(struct ib_device *device, u8 port_num,
95 		 unsigned int index, const union ib_gid *gid,
96 		 const struct ib_gid_attr *attr, void **context)
97 {
98 	if (!rdma_cap_roce_gid_table(device, port_num))
99 		return -EINVAL;
100 
101 	if (port_num > QEDR_MAX_PORT)
102 		return -EINVAL;
103 
104 	if (!context)
105 		return -EINVAL;
106 
107 	return 0;
108 }
109 
110 int qedr_del_gid(struct ib_device *device, u8 port_num,
111 		 unsigned int index, void **context)
112 {
113 	if (!rdma_cap_roce_gid_table(device, port_num))
114 		return -EINVAL;
115 
116 	if (port_num > QEDR_MAX_PORT)
117 		return -EINVAL;
118 
119 	if (!context)
120 		return -EINVAL;
121 
122 	return 0;
123 }
124 
125 int qedr_query_device(struct ib_device *ibdev,
126 		      struct ib_device_attr *attr, struct ib_udata *udata)
127 {
128 	struct qedr_dev *dev = get_qedr_dev(ibdev);
129 	struct qedr_device_attr *qattr = &dev->attr;
130 
131 	if (!dev->rdma_ctx) {
132 		DP_ERR(dev,
133 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
134 		       dev->rdma_ctx);
135 		return -EINVAL;
136 	}
137 
138 	memset(attr, 0, sizeof(*attr));
139 
140 	attr->fw_ver = qattr->fw_ver;
141 	attr->sys_image_guid = qattr->sys_image_guid;
142 	attr->max_mr_size = qattr->max_mr_size;
143 	attr->page_size_cap = qattr->page_size_caps;
144 	attr->vendor_id = qattr->vendor_id;
145 	attr->vendor_part_id = qattr->vendor_part_id;
146 	attr->hw_ver = qattr->hw_ver;
147 	attr->max_qp = qattr->max_qp;
148 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
149 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
150 	    IB_DEVICE_RC_RNR_NAK_GEN |
151 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
152 
153 	attr->max_sge = qattr->max_sge;
154 	attr->max_sge_rd = qattr->max_sge;
155 	attr->max_cq = qattr->max_cq;
156 	attr->max_cqe = qattr->max_cqe;
157 	attr->max_mr = qattr->max_mr;
158 	attr->max_mw = qattr->max_mw;
159 	attr->max_pd = qattr->max_pd;
160 	attr->atomic_cap = dev->atomic_cap;
161 	attr->max_fmr = qattr->max_fmr;
162 	attr->max_map_per_fmr = 16;
163 	attr->max_qp_init_rd_atom =
164 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
165 	attr->max_qp_rd_atom =
166 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
167 		attr->max_qp_init_rd_atom);
168 
169 	attr->max_srq = qattr->max_srq;
170 	attr->max_srq_sge = qattr->max_srq_sge;
171 	attr->max_srq_wr = qattr->max_srq_wr;
172 
173 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
174 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
175 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
176 	attr->max_ah = qattr->max_ah;
177 
178 	return 0;
179 }
180 
181 #define QEDR_SPEED_SDR		(1)
182 #define QEDR_SPEED_DDR		(2)
183 #define QEDR_SPEED_QDR		(4)
184 #define QEDR_SPEED_FDR10	(8)
185 #define QEDR_SPEED_FDR		(16)
186 #define QEDR_SPEED_EDR		(32)
187 
188 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
189 					    u8 *ib_width)
190 {
191 	switch (speed) {
192 	case 1000:
193 		*ib_speed = QEDR_SPEED_SDR;
194 		*ib_width = IB_WIDTH_1X;
195 		break;
196 	case 10000:
197 		*ib_speed = QEDR_SPEED_QDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 20000:
202 		*ib_speed = QEDR_SPEED_DDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	case 25000:
207 		*ib_speed = QEDR_SPEED_EDR;
208 		*ib_width = IB_WIDTH_1X;
209 		break;
210 
211 	case 40000:
212 		*ib_speed = QEDR_SPEED_QDR;
213 		*ib_width = IB_WIDTH_4X;
214 		break;
215 
216 	case 50000:
217 		*ib_speed = QEDR_SPEED_QDR;
218 		*ib_width = IB_WIDTH_4X;
219 		break;
220 
221 	case 100000:
222 		*ib_speed = QEDR_SPEED_EDR;
223 		*ib_width = IB_WIDTH_4X;
224 		break;
225 
226 	default:
227 		/* Unsupported */
228 		*ib_speed = QEDR_SPEED_SDR;
229 		*ib_width = IB_WIDTH_1X;
230 	}
231 }
232 
233 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
234 {
235 	struct qedr_dev *dev;
236 	struct qed_rdma_port *rdma_port;
237 
238 	dev = get_qedr_dev(ibdev);
239 	if (port > 1) {
240 		DP_ERR(dev, "invalid_port=0x%x\n", port);
241 		return -EINVAL;
242 	}
243 
244 	if (!dev->rdma_ctx) {
245 		DP_ERR(dev, "rdma_ctx is NULL\n");
246 		return -EINVAL;
247 	}
248 
249 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
250 
251 	/* *attr being zeroed by the caller, avoid zeroing it here */
252 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
253 		attr->state = IB_PORT_ACTIVE;
254 		attr->phys_state = 5;
255 	} else {
256 		attr->state = IB_PORT_DOWN;
257 		attr->phys_state = 3;
258 	}
259 	attr->max_mtu = IB_MTU_4096;
260 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
261 	attr->lid = 0;
262 	attr->lmc = 0;
263 	attr->sm_lid = 0;
264 	attr->sm_sl = 0;
265 	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
266 	attr->gid_tbl_len = QEDR_MAX_SGID;
267 	attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
268 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
269 	attr->qkey_viol_cntr = 0;
270 	get_link_speed_and_width(rdma_port->link_speed,
271 				 &attr->active_speed, &attr->active_width);
272 	attr->max_msg_sz = rdma_port->max_msg_size;
273 	attr->max_vl_num = 4;
274 
275 	return 0;
276 }
277 
278 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
279 		     struct ib_port_modify *props)
280 {
281 	struct qedr_dev *dev;
282 
283 	dev = get_qedr_dev(ibdev);
284 	if (port > 1) {
285 		DP_ERR(dev, "invalid_port=0x%x\n", port);
286 		return -EINVAL;
287 	}
288 
289 	return 0;
290 }
291 
292 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
293 			 unsigned long len)
294 {
295 	struct qedr_mm *mm;
296 
297 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
298 	if (!mm)
299 		return -ENOMEM;
300 
301 	mm->key.phy_addr = phy_addr;
302 	/* This function might be called with a length which is not a multiple
303 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
304 	 * forces this granularity by increasing the requested size if needed.
305 	 * When qedr_mmap is called, it will search the list with the updated
306 	 * length as a key. To prevent search failures, the length is rounded up
307 	 * in advance to PAGE_SIZE.
308 	 */
309 	mm->key.len = roundup(len, PAGE_SIZE);
310 	INIT_LIST_HEAD(&mm->entry);
311 
312 	mutex_lock(&uctx->mm_list_lock);
313 	list_add(&mm->entry, &uctx->mm_head);
314 	mutex_unlock(&uctx->mm_list_lock);
315 
316 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
317 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
318 		 (unsigned long long)mm->key.phy_addr,
319 		 (unsigned long)mm->key.len, uctx);
320 
321 	return 0;
322 }
323 
324 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
325 			     unsigned long len)
326 {
327 	bool found = false;
328 	struct qedr_mm *mm;
329 
330 	mutex_lock(&uctx->mm_list_lock);
331 	list_for_each_entry(mm, &uctx->mm_head, entry) {
332 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
333 			continue;
334 
335 		found = true;
336 		break;
337 	}
338 	mutex_unlock(&uctx->mm_list_lock);
339 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
340 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
341 		 mm->key.phy_addr, mm->key.len, uctx, found);
342 
343 	return found;
344 }
345 
346 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
347 					struct ib_udata *udata)
348 {
349 	int rc;
350 	struct qedr_ucontext *ctx;
351 	struct qedr_alloc_ucontext_resp uresp;
352 	struct qedr_dev *dev = get_qedr_dev(ibdev);
353 	struct qed_rdma_add_user_out_params oparams;
354 
355 	if (!udata)
356 		return ERR_PTR(-EFAULT);
357 
358 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
359 	if (!ctx)
360 		return ERR_PTR(-ENOMEM);
361 
362 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
363 	if (rc) {
364 		DP_ERR(dev,
365 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
366 		       rc);
367 		goto err;
368 	}
369 
370 	ctx->dpi = oparams.dpi;
371 	ctx->dpi_addr = oparams.dpi_addr;
372 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
373 	ctx->dpi_size = oparams.dpi_size;
374 	INIT_LIST_HEAD(&ctx->mm_head);
375 	mutex_init(&ctx->mm_list_lock);
376 
377 	memset(&uresp, 0, sizeof(uresp));
378 
379 	uresp.db_pa = ctx->dpi_phys_addr;
380 	uresp.db_size = ctx->dpi_size;
381 	uresp.max_send_wr = dev->attr.max_sqe;
382 	uresp.max_recv_wr = dev->attr.max_rqe;
383 	uresp.max_srq_wr = dev->attr.max_srq_wr;
384 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
385 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
386 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
387 	uresp.max_cqes = QEDR_MAX_CQES;
388 
389 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
390 	if (rc)
391 		goto err;
392 
393 	ctx->dev = dev;
394 
395 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
396 	if (rc)
397 		goto err;
398 
399 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
400 		 &ctx->ibucontext);
401 	return &ctx->ibucontext;
402 
403 err:
404 	kfree(ctx);
405 	return ERR_PTR(rc);
406 }
407 
408 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
409 {
410 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
411 	struct qedr_mm *mm, *tmp;
412 	int status = 0;
413 
414 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
415 		 uctx);
416 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
417 
418 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
419 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
420 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
421 			 mm->key.phy_addr, mm->key.len, uctx);
422 		list_del(&mm->entry);
423 		kfree(mm);
424 	}
425 
426 	kfree(uctx);
427 	return status;
428 }
429 
430 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
431 {
432 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
433 	struct qedr_dev *dev = get_qedr_dev(context->device);
434 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
435 	u64 unmapped_db = dev->db_phys_addr;
436 	unsigned long len = (vma->vm_end - vma->vm_start);
437 	int rc = 0;
438 	bool found;
439 
440 	DP_DEBUG(dev, QEDR_MSG_INIT,
441 		 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
442 		 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
443 	if (vma->vm_start & (PAGE_SIZE - 1)) {
444 		DP_ERR(dev, "Vma_start not page aligned = %ld\n",
445 		       vma->vm_start);
446 		return -EINVAL;
447 	}
448 
449 	found = qedr_search_mmap(ucontext, vm_page, len);
450 	if (!found) {
451 		DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
452 		       vma->vm_pgoff);
453 		return -EINVAL;
454 	}
455 
456 	DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
457 
458 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
459 						     dev->db_size))) {
460 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
461 		if (vma->vm_flags & VM_READ) {
462 			DP_ERR(dev, "Trying to map doorbell bar for read\n");
463 			return -EPERM;
464 		}
465 
466 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
467 
468 		rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
469 					PAGE_SIZE, vma->vm_page_prot);
470 	} else {
471 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
472 		rc = remap_pfn_range(vma, vma->vm_start,
473 				     vma->vm_pgoff, len, vma->vm_page_prot);
474 	}
475 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
476 	return rc;
477 }
478 
479 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
480 			    struct ib_ucontext *context, struct ib_udata *udata)
481 {
482 	struct qedr_dev *dev = get_qedr_dev(ibdev);
483 	struct qedr_pd *pd;
484 	u16 pd_id;
485 	int rc;
486 
487 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
488 		 (udata && context) ? "User Lib" : "Kernel");
489 
490 	if (!dev->rdma_ctx) {
491 		DP_ERR(dev, "invlaid RDMA context\n");
492 		return ERR_PTR(-EINVAL);
493 	}
494 
495 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
496 	if (!pd)
497 		return ERR_PTR(-ENOMEM);
498 
499 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
500 	if (rc)
501 		goto err;
502 
503 	pd->pd_id = pd_id;
504 
505 	if (udata && context) {
506 		struct qedr_alloc_pd_uresp uresp;
507 
508 		uresp.pd_id = pd_id;
509 
510 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
511 		if (rc) {
512 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
513 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
514 			goto err;
515 		}
516 
517 		pd->uctx = get_qedr_ucontext(context);
518 		pd->uctx->pd = pd;
519 	}
520 
521 	return &pd->ibpd;
522 
523 err:
524 	kfree(pd);
525 	return ERR_PTR(rc);
526 }
527 
528 int qedr_dealloc_pd(struct ib_pd *ibpd)
529 {
530 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
531 	struct qedr_pd *pd = get_qedr_pd(ibpd);
532 
533 	if (!pd) {
534 		pr_err("Invalid PD received in dealloc_pd\n");
535 		return -EINVAL;
536 	}
537 
538 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
539 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
540 
541 	kfree(pd);
542 
543 	return 0;
544 }
545 
546 static void qedr_free_pbl(struct qedr_dev *dev,
547 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
548 {
549 	struct pci_dev *pdev = dev->pdev;
550 	int i;
551 
552 	for (i = 0; i < pbl_info->num_pbls; i++) {
553 		if (!pbl[i].va)
554 			continue;
555 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
556 				  pbl[i].va, pbl[i].pa);
557 	}
558 
559 	kfree(pbl);
560 }
561 
562 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
563 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
564 
565 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
566 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
567 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
568 
569 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
570 					   struct qedr_pbl_info *pbl_info,
571 					   gfp_t flags)
572 {
573 	struct pci_dev *pdev = dev->pdev;
574 	struct qedr_pbl *pbl_table;
575 	dma_addr_t *pbl_main_tbl;
576 	dma_addr_t pa;
577 	void *va;
578 	int i;
579 
580 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
581 	if (!pbl_table)
582 		return ERR_PTR(-ENOMEM);
583 
584 	for (i = 0; i < pbl_info->num_pbls; i++) {
585 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
586 					&pa, flags);
587 		if (!va)
588 			goto err;
589 
590 		memset(va, 0, pbl_info->pbl_size);
591 		pbl_table[i].va = va;
592 		pbl_table[i].pa = pa;
593 	}
594 
595 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
596 	 * the first one with physical pointers to all of the rest
597 	 */
598 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
599 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
600 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
601 
602 	return pbl_table;
603 
604 err:
605 	for (i--; i >= 0; i--)
606 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
607 				  pbl_table[i].va, pbl_table[i].pa);
608 
609 	qedr_free_pbl(dev, pbl_info, pbl_table);
610 
611 	return ERR_PTR(-ENOMEM);
612 }
613 
614 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
615 				struct qedr_pbl_info *pbl_info,
616 				u32 num_pbes, int two_layer_capable)
617 {
618 	u32 pbl_capacity;
619 	u32 pbl_size;
620 	u32 num_pbls;
621 
622 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
623 		if (num_pbes > MAX_PBES_TWO_LAYER) {
624 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
625 			       num_pbes);
626 			return -EINVAL;
627 		}
628 
629 		/* calculate required pbl page size */
630 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
631 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
632 			       NUM_PBES_ON_PAGE(pbl_size);
633 
634 		while (pbl_capacity < num_pbes) {
635 			pbl_size *= 2;
636 			pbl_capacity = pbl_size / sizeof(u64);
637 			pbl_capacity = pbl_capacity * pbl_capacity;
638 		}
639 
640 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
641 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
642 		pbl_info->two_layered = true;
643 	} else {
644 		/* One layered PBL */
645 		num_pbls = 1;
646 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
647 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
648 		pbl_info->two_layered = false;
649 	}
650 
651 	pbl_info->num_pbls = num_pbls;
652 	pbl_info->pbl_size = pbl_size;
653 	pbl_info->num_pbes = num_pbes;
654 
655 	DP_DEBUG(dev, QEDR_MSG_MR,
656 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
657 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
658 
659 	return 0;
660 }
661 
662 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
663 			       struct qedr_pbl *pbl,
664 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
665 {
666 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
667 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
668 	struct qedr_pbl *pbl_tbl;
669 	struct scatterlist *sg;
670 	struct regpair *pbe;
671 	u64 pg_addr;
672 	int entry;
673 
674 	if (!pbl_info->num_pbes)
675 		return;
676 
677 	/* If we have a two layered pbl, the first pbl points to the rest
678 	 * of the pbls and the first entry lays on the second pbl in the table
679 	 */
680 	if (pbl_info->two_layered)
681 		pbl_tbl = &pbl[1];
682 	else
683 		pbl_tbl = pbl;
684 
685 	pbe = (struct regpair *)pbl_tbl->va;
686 	if (!pbe) {
687 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
688 		return;
689 	}
690 
691 	pbe_cnt = 0;
692 
693 	shift = umem->page_shift;
694 
695 	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
696 
697 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
698 		pages = sg_dma_len(sg) >> shift;
699 		pg_addr = sg_dma_address(sg);
700 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
701 			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
702 				pbe->lo = cpu_to_le32(pg_addr);
703 				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
704 
705 				pg_addr += BIT(pg_shift);
706 				pbe_cnt++;
707 				total_num_pbes++;
708 				pbe++;
709 
710 				if (total_num_pbes == pbl_info->num_pbes)
711 					return;
712 
713 				/* If the given pbl is full storing the pbes,
714 				 * move to next pbl.
715 				 */
716 				if (pbe_cnt ==
717 				    (pbl_info->pbl_size / sizeof(u64))) {
718 					pbl_tbl++;
719 					pbe = (struct regpair *)pbl_tbl->va;
720 					pbe_cnt = 0;
721 				}
722 
723 				fw_pg_cnt++;
724 			}
725 		}
726 	}
727 }
728 
729 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
730 			      struct qedr_cq *cq, struct ib_udata *udata)
731 {
732 	struct qedr_create_cq_uresp uresp;
733 	int rc;
734 
735 	memset(&uresp, 0, sizeof(uresp));
736 
737 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
738 	uresp.icid = cq->icid;
739 
740 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
741 	if (rc)
742 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
743 
744 	return rc;
745 }
746 
747 static void consume_cqe(struct qedr_cq *cq)
748 {
749 	if (cq->latest_cqe == cq->toggle_cqe)
750 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
751 
752 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
753 }
754 
755 static inline int qedr_align_cq_entries(int entries)
756 {
757 	u64 size, aligned_size;
758 
759 	/* We allocate an extra entry that we don't report to the FW. */
760 	size = (entries + 1) * QEDR_CQE_SIZE;
761 	aligned_size = ALIGN(size, PAGE_SIZE);
762 
763 	return aligned_size / QEDR_CQE_SIZE;
764 }
765 
766 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
767 				       struct qedr_dev *dev,
768 				       struct qedr_userq *q,
769 				       u64 buf_addr, size_t buf_len,
770 				       int access, int dmasync)
771 {
772 	u32 fw_pages;
773 	int rc;
774 
775 	q->buf_addr = buf_addr;
776 	q->buf_len = buf_len;
777 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
778 	if (IS_ERR(q->umem)) {
779 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
780 		       PTR_ERR(q->umem));
781 		return PTR_ERR(q->umem);
782 	}
783 
784 	fw_pages = ib_umem_page_count(q->umem) <<
785 	    (q->umem->page_shift - FW_PAGE_SHIFT);
786 
787 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
788 	if (rc)
789 		goto err0;
790 
791 	q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
792 	if (IS_ERR(q->pbl_tbl)) {
793 		rc = PTR_ERR(q->pbl_tbl);
794 		goto err0;
795 	}
796 
797 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
798 				   FW_PAGE_SHIFT);
799 
800 	return 0;
801 
802 err0:
803 	ib_umem_release(q->umem);
804 
805 	return rc;
806 }
807 
808 static inline void qedr_init_cq_params(struct qedr_cq *cq,
809 				       struct qedr_ucontext *ctx,
810 				       struct qedr_dev *dev, int vector,
811 				       int chain_entries, int page_cnt,
812 				       u64 pbl_ptr,
813 				       struct qed_rdma_create_cq_in_params
814 				       *params)
815 {
816 	memset(params, 0, sizeof(*params));
817 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
818 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
819 	params->cnq_id = vector;
820 	params->cq_size = chain_entries - 1;
821 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
822 	params->pbl_num_pages = page_cnt;
823 	params->pbl_ptr = pbl_ptr;
824 	params->pbl_two_level = 0;
825 }
826 
827 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
828 {
829 	/* Flush data before signalling doorbell */
830 	wmb();
831 	cq->db.data.agg_flags = flags;
832 	cq->db.data.value = cpu_to_le32(cons);
833 	writeq(cq->db.raw, cq->db_addr);
834 
835 	/* Make sure write would stick */
836 	mmiowb();
837 }
838 
839 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
840 {
841 	struct qedr_cq *cq = get_qedr_cq(ibcq);
842 	unsigned long sflags;
843 	struct qedr_dev *dev;
844 
845 	dev = get_qedr_dev(ibcq->device);
846 
847 	if (cq->destroyed) {
848 		DP_ERR(dev,
849 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
850 		       cq, cq->icid);
851 		return -EINVAL;
852 	}
853 
854 
855 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
856 		return 0;
857 
858 	spin_lock_irqsave(&cq->cq_lock, sflags);
859 
860 	cq->arm_flags = 0;
861 
862 	if (flags & IB_CQ_SOLICITED)
863 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
864 
865 	if (flags & IB_CQ_NEXT_COMP)
866 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
867 
868 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
869 
870 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
871 
872 	return 0;
873 }
874 
875 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
876 			     const struct ib_cq_init_attr *attr,
877 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
878 {
879 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
880 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
881 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
882 	struct qedr_dev *dev = get_qedr_dev(ibdev);
883 	struct qed_rdma_create_cq_in_params params;
884 	struct qedr_create_cq_ureq ureq;
885 	int vector = attr->comp_vector;
886 	int entries = attr->cqe;
887 	struct qedr_cq *cq;
888 	int chain_entries;
889 	int page_cnt;
890 	u64 pbl_ptr;
891 	u16 icid;
892 	int rc;
893 
894 	DP_DEBUG(dev, QEDR_MSG_INIT,
895 		 "create_cq: called from %s. entries=%d, vector=%d\n",
896 		 udata ? "User Lib" : "Kernel", entries, vector);
897 
898 	if (entries > QEDR_MAX_CQES) {
899 		DP_ERR(dev,
900 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
901 		       entries, QEDR_MAX_CQES);
902 		return ERR_PTR(-EINVAL);
903 	}
904 
905 	chain_entries = qedr_align_cq_entries(entries);
906 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
907 
908 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
909 	if (!cq)
910 		return ERR_PTR(-ENOMEM);
911 
912 	if (udata) {
913 		memset(&ureq, 0, sizeof(ureq));
914 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
915 			DP_ERR(dev,
916 			       "create cq: problem copying data from user space\n");
917 			goto err0;
918 		}
919 
920 		if (!ureq.len) {
921 			DP_ERR(dev,
922 			       "create cq: cannot create a cq with 0 entries\n");
923 			goto err0;
924 		}
925 
926 		cq->cq_type = QEDR_CQ_TYPE_USER;
927 
928 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
929 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
930 		if (rc)
931 			goto err0;
932 
933 		pbl_ptr = cq->q.pbl_tbl->pa;
934 		page_cnt = cq->q.pbl_info.num_pbes;
935 
936 		cq->ibcq.cqe = chain_entries;
937 	} else {
938 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
939 
940 		rc = dev->ops->common->chain_alloc(dev->cdev,
941 						   QED_CHAIN_USE_TO_CONSUME,
942 						   QED_CHAIN_MODE_PBL,
943 						   QED_CHAIN_CNT_TYPE_U32,
944 						   chain_entries,
945 						   sizeof(union rdma_cqe),
946 						   &cq->pbl, NULL);
947 		if (rc)
948 			goto err1;
949 
950 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
951 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
952 		cq->ibcq.cqe = cq->pbl.capacity;
953 	}
954 
955 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
956 			    pbl_ptr, &params);
957 
958 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
959 	if (rc)
960 		goto err2;
961 
962 	cq->icid = icid;
963 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
964 	spin_lock_init(&cq->cq_lock);
965 
966 	if (ib_ctx) {
967 		rc = qedr_copy_cq_uresp(dev, cq, udata);
968 		if (rc)
969 			goto err3;
970 	} else {
971 		/* Generate doorbell address. */
972 		cq->db_addr = dev->db_addr +
973 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
974 		cq->db.data.icid = cq->icid;
975 		cq->db.data.params = DB_AGG_CMD_SET <<
976 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
977 
978 		/* point to the very last element, passing it we will toggle */
979 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
980 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
981 		cq->latest_cqe = NULL;
982 		consume_cqe(cq);
983 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
984 	}
985 
986 	DP_DEBUG(dev, QEDR_MSG_CQ,
987 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
988 		 cq->icid, cq, params.cq_size);
989 
990 	return &cq->ibcq;
991 
992 err3:
993 	destroy_iparams.icid = cq->icid;
994 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
995 				  &destroy_oparams);
996 err2:
997 	if (udata)
998 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
999 	else
1000 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1001 err1:
1002 	if (udata)
1003 		ib_umem_release(cq->q.umem);
1004 err0:
1005 	kfree(cq);
1006 	return ERR_PTR(-EINVAL);
1007 }
1008 
1009 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1010 {
1011 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1012 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1013 
1014 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1015 
1016 	return 0;
1017 }
1018 
1019 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1020 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1021 
1022 int qedr_destroy_cq(struct ib_cq *ibcq)
1023 {
1024 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1025 	struct qed_rdma_destroy_cq_out_params oparams;
1026 	struct qed_rdma_destroy_cq_in_params iparams;
1027 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1028 	int iter;
1029 	int rc;
1030 
1031 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1032 
1033 	cq->destroyed = 1;
1034 
1035 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1036 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1037 		goto done;
1038 
1039 	iparams.icid = cq->icid;
1040 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1041 	if (rc)
1042 		return rc;
1043 
1044 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1045 
1046 	if (ibcq->uobject && ibcq->uobject->context) {
1047 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1048 		ib_umem_release(cq->q.umem);
1049 	}
1050 
1051 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1052 	 * wait until all CNQ interrupts, if any, are received. This will always
1053 	 * happen and will always happen very fast. If not, then a serious error
1054 	 * has occured. That is why we can use a long delay.
1055 	 * We spin for a short time so we don’t lose time on context switching
1056 	 * in case all the completions are handled in that span. Otherwise
1057 	 * we sleep for a while and check again. Since the CNQ may be
1058 	 * associated with (only) the current CPU we use msleep to allow the
1059 	 * current CPU to be freed.
1060 	 * The CNQ notification is increased in qedr_irq_handler().
1061 	 */
1062 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1063 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1064 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1065 		iter--;
1066 	}
1067 
1068 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1069 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1070 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1071 		iter--;
1072 	}
1073 
1074 	if (oparams.num_cq_notif != cq->cnq_notif)
1075 		goto err;
1076 
1077 	/* Note that we don't need to have explicit code to wait for the
1078 	 * completion of the event handler because it is invoked from the EQ.
1079 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1080 	 * be certain that there's no event handler in process.
1081 	 */
1082 done:
1083 	cq->sig = ~cq->sig;
1084 
1085 	kfree(cq);
1086 
1087 	return 0;
1088 
1089 err:
1090 	DP_ERR(dev,
1091 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1092 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1093 
1094 	return -EINVAL;
1095 }
1096 
1097 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1098 					  struct ib_qp_attr *attr,
1099 					  int attr_mask,
1100 					  struct qed_rdma_modify_qp_in_params
1101 					  *qp_params)
1102 {
1103 	enum rdma_network_type nw_type;
1104 	struct ib_gid_attr gid_attr;
1105 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1106 	union ib_gid gid;
1107 	u32 ipv4_addr;
1108 	int rc = 0;
1109 	int i;
1110 
1111 	rc = ib_get_cached_gid(ibqp->device,
1112 			       rdma_ah_get_port_num(&attr->ah_attr),
1113 			       grh->sgid_index, &gid, &gid_attr);
1114 	if (rc)
1115 		return rc;
1116 
1117 	if (!memcmp(&gid, &zgid, sizeof(gid)))
1118 		return -ENOENT;
1119 
1120 	if (gid_attr.ndev) {
1121 		qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1122 
1123 		dev_put(gid_attr.ndev);
1124 		nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1125 		switch (nw_type) {
1126 		case RDMA_NETWORK_IPV6:
1127 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1128 			       sizeof(qp_params->sgid));
1129 			memcpy(&qp_params->dgid.bytes[0],
1130 			       &grh->dgid,
1131 			       sizeof(qp_params->dgid));
1132 			qp_params->roce_mode = ROCE_V2_IPV6;
1133 			SET_FIELD(qp_params->modify_flags,
1134 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1135 			break;
1136 		case RDMA_NETWORK_IB:
1137 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1138 			       sizeof(qp_params->sgid));
1139 			memcpy(&qp_params->dgid.bytes[0],
1140 			       &grh->dgid,
1141 			       sizeof(qp_params->dgid));
1142 			qp_params->roce_mode = ROCE_V1;
1143 			break;
1144 		case RDMA_NETWORK_IPV4:
1145 			memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1146 			memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1147 			ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1148 			qp_params->sgid.ipv4_addr = ipv4_addr;
1149 			ipv4_addr =
1150 			    qedr_get_ipv4_from_gid(grh->dgid.raw);
1151 			qp_params->dgid.ipv4_addr = ipv4_addr;
1152 			SET_FIELD(qp_params->modify_flags,
1153 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1154 			qp_params->roce_mode = ROCE_V2_IPV4;
1155 			break;
1156 		}
1157 	}
1158 
1159 	for (i = 0; i < 4; i++) {
1160 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1161 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1162 	}
1163 
1164 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1165 		qp_params->vlan_id = 0;
1166 
1167 	return 0;
1168 }
1169 
1170 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1171 			       struct ib_qp_init_attr *attrs)
1172 {
1173 	struct qedr_device_attr *qattr = &dev->attr;
1174 
1175 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1176 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1177 		DP_DEBUG(dev, QEDR_MSG_QP,
1178 			 "create qp: unsupported qp type=0x%x requested\n",
1179 			 attrs->qp_type);
1180 		return -EINVAL;
1181 	}
1182 
1183 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1184 		DP_ERR(dev,
1185 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1186 		       attrs->cap.max_send_wr, qattr->max_sqe);
1187 		return -EINVAL;
1188 	}
1189 
1190 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1191 		DP_ERR(dev,
1192 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1193 		       attrs->cap.max_inline_data, qattr->max_inline);
1194 		return -EINVAL;
1195 	}
1196 
1197 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1198 		DP_ERR(dev,
1199 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1200 		       attrs->cap.max_send_sge, qattr->max_sge);
1201 		return -EINVAL;
1202 	}
1203 
1204 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1205 		DP_ERR(dev,
1206 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1207 		       attrs->cap.max_recv_sge, qattr->max_sge);
1208 		return -EINVAL;
1209 	}
1210 
1211 	/* Unprivileged user space cannot create special QP */
1212 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1213 		DP_ERR(dev,
1214 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1215 		       attrs->qp_type);
1216 		return -EINVAL;
1217 	}
1218 
1219 	return 0;
1220 }
1221 
1222 static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
1223 			       struct qedr_qp *qp)
1224 {
1225 	uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1226 	uresp->rq_icid = qp->icid;
1227 }
1228 
1229 static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
1230 			       struct qedr_qp *qp)
1231 {
1232 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1233 	uresp->sq_icid = qp->icid + 1;
1234 }
1235 
1236 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1237 			      struct qedr_qp *qp, struct ib_udata *udata)
1238 {
1239 	struct qedr_create_qp_uresp uresp;
1240 	int rc;
1241 
1242 	memset(&uresp, 0, sizeof(uresp));
1243 	qedr_copy_sq_uresp(&uresp, qp);
1244 	qedr_copy_rq_uresp(&uresp, qp);
1245 
1246 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1247 	uresp.qp_id = qp->qp_id;
1248 
1249 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1250 	if (rc)
1251 		DP_ERR(dev,
1252 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1253 		       qp->icid);
1254 
1255 	return rc;
1256 }
1257 
1258 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1259 				      struct qedr_qp *qp,
1260 				      struct qedr_pd *pd,
1261 				      struct ib_qp_init_attr *attrs)
1262 {
1263 	spin_lock_init(&qp->q_lock);
1264 	qp->pd = pd;
1265 	qp->qp_type = attrs->qp_type;
1266 	qp->max_inline_data = attrs->cap.max_inline_data;
1267 	qp->sq.max_sges = attrs->cap.max_send_sge;
1268 	qp->state = QED_ROCE_QP_STATE_RESET;
1269 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1270 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1271 	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1272 	qp->dev = dev;
1273 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1274 
1275 	DP_DEBUG(dev, QEDR_MSG_QP,
1276 		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1277 		 qp->rq.max_sges, qp->rq_cq->icid);
1278 	DP_DEBUG(dev, QEDR_MSG_QP,
1279 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1280 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1281 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1282 	DP_DEBUG(dev, QEDR_MSG_QP,
1283 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1284 		 qp->sq.max_sges, qp->sq_cq->icid);
1285 }
1286 
1287 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1288 {
1289 	qp->sq.db = dev->db_addr +
1290 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1291 	qp->sq.db_data.data.icid = qp->icid + 1;
1292 	qp->rq.db = dev->db_addr +
1293 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1294 	qp->rq.db_data.data.icid = qp->icid;
1295 }
1296 
1297 static inline void
1298 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1299 			      struct qedr_pd *pd,
1300 			      struct qedr_qp *qp,
1301 			      struct ib_qp_init_attr *attrs,
1302 			      bool fmr_and_reserved_lkey,
1303 			      struct qed_rdma_create_qp_in_params *params)
1304 {
1305 	/* QP handle to be written in an async event */
1306 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1307 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1308 
1309 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1310 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1311 	params->pd = pd->pd_id;
1312 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1313 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1314 	params->stats_queue = 0;
1315 	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1316 	params->srq_id = 0;
1317 	params->use_srq = false;
1318 }
1319 
1320 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1321 {
1322 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1323 		 "qp=%p. "
1324 		 "sq_addr=0x%llx, "
1325 		 "sq_len=%zd, "
1326 		 "rq_addr=0x%llx, "
1327 		 "rq_len=%zd"
1328 		 "\n",
1329 		 qp,
1330 		 qp->usq.buf_addr,
1331 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1332 }
1333 
1334 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1335 {
1336 	if (qp->usq.umem)
1337 		ib_umem_release(qp->usq.umem);
1338 	qp->usq.umem = NULL;
1339 
1340 	if (qp->urq.umem)
1341 		ib_umem_release(qp->urq.umem);
1342 	qp->urq.umem = NULL;
1343 }
1344 
1345 static int qedr_create_user_qp(struct qedr_dev *dev,
1346 			       struct qedr_qp *qp,
1347 			       struct ib_pd *ibpd,
1348 			       struct ib_udata *udata,
1349 			       struct ib_qp_init_attr *attrs)
1350 {
1351 	struct qed_rdma_create_qp_in_params in_params;
1352 	struct qed_rdma_create_qp_out_params out_params;
1353 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1354 	struct ib_ucontext *ib_ctx = NULL;
1355 	struct qedr_ucontext *ctx = NULL;
1356 	struct qedr_create_qp_ureq ureq;
1357 	int rc = -EINVAL;
1358 
1359 	ib_ctx = ibpd->uobject->context;
1360 	ctx = get_qedr_ucontext(ib_ctx);
1361 
1362 	memset(&ureq, 0, sizeof(ureq));
1363 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1364 	if (rc) {
1365 		DP_ERR(dev, "Problem copying data from user space\n");
1366 		return rc;
1367 	}
1368 
1369 	/* SQ - read access only (0), dma sync not required (0) */
1370 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1371 				  ureq.sq_len, 0, 0);
1372 	if (rc)
1373 		return rc;
1374 
1375 	/* RQ - read access only (0), dma sync not required (0) */
1376 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1377 				  ureq.rq_len, 0, 0);
1378 
1379 	if (rc)
1380 		return rc;
1381 
1382 	memset(&in_params, 0, sizeof(in_params));
1383 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1384 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1385 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1386 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1387 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1388 	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1389 	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1390 
1391 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1392 					      &in_params, &out_params);
1393 
1394 	if (!qp->qed_qp) {
1395 		rc = -ENOMEM;
1396 		goto err1;
1397 	}
1398 
1399 	qp->qp_id = out_params.qp_id;
1400 	qp->icid = out_params.icid;
1401 
1402 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1403 	if (rc)
1404 		goto err;
1405 
1406 	qedr_qp_user_print(dev, qp);
1407 
1408 	return 0;
1409 err:
1410 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1411 	if (rc)
1412 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1413 
1414 err1:
1415 	qedr_cleanup_user(dev, qp);
1416 	return rc;
1417 }
1418 
1419 static int
1420 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1421 			   struct qedr_qp *qp,
1422 			   struct qed_rdma_create_qp_in_params *in_params,
1423 			   u32 n_sq_elems, u32 n_rq_elems)
1424 {
1425 	struct qed_rdma_create_qp_out_params out_params;
1426 	int rc;
1427 
1428 	rc = dev->ops->common->chain_alloc(dev->cdev,
1429 					   QED_CHAIN_USE_TO_PRODUCE,
1430 					   QED_CHAIN_MODE_PBL,
1431 					   QED_CHAIN_CNT_TYPE_U32,
1432 					   n_sq_elems,
1433 					   QEDR_SQE_ELEMENT_SIZE,
1434 					   &qp->sq.pbl, NULL);
1435 
1436 	if (rc)
1437 		return rc;
1438 
1439 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1440 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1441 
1442 	rc = dev->ops->common->chain_alloc(dev->cdev,
1443 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1444 					   QED_CHAIN_MODE_PBL,
1445 					   QED_CHAIN_CNT_TYPE_U32,
1446 					   n_rq_elems,
1447 					   QEDR_RQE_ELEMENT_SIZE,
1448 					   &qp->rq.pbl, NULL);
1449 	if (rc)
1450 		return rc;
1451 
1452 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1453 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1454 
1455 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1456 					      in_params, &out_params);
1457 
1458 	if (!qp->qed_qp)
1459 		return -EINVAL;
1460 
1461 	qp->qp_id = out_params.qp_id;
1462 	qp->icid = out_params.icid;
1463 
1464 	qedr_set_roce_db_info(dev, qp);
1465 
1466 	return 0;
1467 }
1468 
1469 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1470 {
1471 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1472 	kfree(qp->wqe_wr_id);
1473 
1474 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1475 	kfree(qp->rqe_wr_id);
1476 }
1477 
1478 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1479 				 struct qedr_qp *qp,
1480 				 struct ib_pd *ibpd,
1481 				 struct ib_qp_init_attr *attrs)
1482 {
1483 	struct qed_rdma_create_qp_in_params in_params;
1484 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1485 	int rc = -EINVAL;
1486 	u32 n_rq_elems;
1487 	u32 n_sq_elems;
1488 	u32 n_sq_entries;
1489 
1490 	memset(&in_params, 0, sizeof(in_params));
1491 
1492 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1493 	 * the ring. The ring should allow at least a single WR, even if the
1494 	 * user requested none, due to allocation issues.
1495 	 * We should add an extra WR since the prod and cons indices of
1496 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1497 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1498 	 * double the number of entries due an iSER issue that pushes far more
1499 	 * WRs than indicated. If we decline its ib_post_send() then we get
1500 	 * error prints in the dmesg we'd like to avoid.
1501 	 */
1502 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1503 			      dev->attr.max_sqe);
1504 
1505 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1506 				GFP_KERNEL);
1507 	if (!qp->wqe_wr_id) {
1508 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1509 		return -ENOMEM;
1510 	}
1511 
1512 	/* QP handle to be written in CQE */
1513 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1514 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1515 
1516 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1517 	 * the ring. There ring should allow at least a single WR, even if the
1518 	 * user requested none, due to allocation issues.
1519 	 */
1520 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1521 
1522 	/* Allocate driver internal RQ array */
1523 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1524 				GFP_KERNEL);
1525 	if (!qp->rqe_wr_id) {
1526 		DP_ERR(dev,
1527 		       "create qp: failed RQ shadow memory allocation\n");
1528 		kfree(qp->wqe_wr_id);
1529 		return -ENOMEM;
1530 	}
1531 
1532 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1533 
1534 	n_sq_entries = attrs->cap.max_send_wr;
1535 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1536 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1537 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1538 
1539 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1540 
1541 	rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1542 					n_sq_elems, n_rq_elems);
1543 	if (rc)
1544 		qedr_cleanup_kernel(dev, qp);
1545 
1546 	return rc;
1547 }
1548 
1549 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1550 			     struct ib_qp_init_attr *attrs,
1551 			     struct ib_udata *udata)
1552 {
1553 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1554 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1555 	struct qedr_qp *qp;
1556 	struct ib_qp *ibqp;
1557 	int rc = 0;
1558 
1559 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1560 		 udata ? "user library" : "kernel", pd);
1561 
1562 	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1563 	if (rc)
1564 		return ERR_PTR(rc);
1565 
1566 	if (attrs->srq)
1567 		return ERR_PTR(-EINVAL);
1568 
1569 	DP_DEBUG(dev, QEDR_MSG_QP,
1570 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1571 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1572 		 get_qedr_cq(attrs->send_cq),
1573 		 get_qedr_cq(attrs->send_cq)->icid,
1574 		 get_qedr_cq(attrs->recv_cq),
1575 		 get_qedr_cq(attrs->recv_cq)->icid);
1576 
1577 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1578 	if (!qp) {
1579 		DP_ERR(dev, "create qp: failed allocating memory\n");
1580 		return ERR_PTR(-ENOMEM);
1581 	}
1582 
1583 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1584 
1585 	if (attrs->qp_type == IB_QPT_GSI) {
1586 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1587 		if (IS_ERR(ibqp))
1588 			kfree(qp);
1589 		return ibqp;
1590 	}
1591 
1592 	if (udata)
1593 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1594 	else
1595 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1596 
1597 	if (rc)
1598 		goto err;
1599 
1600 	qp->ibqp.qp_num = qp->qp_id;
1601 
1602 	return &qp->ibqp;
1603 
1604 err:
1605 	kfree(qp);
1606 
1607 	return ERR_PTR(-EFAULT);
1608 }
1609 
1610 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1611 {
1612 	switch (qp_state) {
1613 	case QED_ROCE_QP_STATE_RESET:
1614 		return IB_QPS_RESET;
1615 	case QED_ROCE_QP_STATE_INIT:
1616 		return IB_QPS_INIT;
1617 	case QED_ROCE_QP_STATE_RTR:
1618 		return IB_QPS_RTR;
1619 	case QED_ROCE_QP_STATE_RTS:
1620 		return IB_QPS_RTS;
1621 	case QED_ROCE_QP_STATE_SQD:
1622 		return IB_QPS_SQD;
1623 	case QED_ROCE_QP_STATE_ERR:
1624 		return IB_QPS_ERR;
1625 	case QED_ROCE_QP_STATE_SQE:
1626 		return IB_QPS_SQE;
1627 	}
1628 	return IB_QPS_ERR;
1629 }
1630 
1631 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1632 					enum ib_qp_state qp_state)
1633 {
1634 	switch (qp_state) {
1635 	case IB_QPS_RESET:
1636 		return QED_ROCE_QP_STATE_RESET;
1637 	case IB_QPS_INIT:
1638 		return QED_ROCE_QP_STATE_INIT;
1639 	case IB_QPS_RTR:
1640 		return QED_ROCE_QP_STATE_RTR;
1641 	case IB_QPS_RTS:
1642 		return QED_ROCE_QP_STATE_RTS;
1643 	case IB_QPS_SQD:
1644 		return QED_ROCE_QP_STATE_SQD;
1645 	case IB_QPS_ERR:
1646 		return QED_ROCE_QP_STATE_ERR;
1647 	default:
1648 		return QED_ROCE_QP_STATE_ERR;
1649 	}
1650 }
1651 
1652 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1653 {
1654 	qed_chain_reset(&qph->pbl);
1655 	qph->prod = 0;
1656 	qph->cons = 0;
1657 	qph->wqe_cons = 0;
1658 	qph->db_data.data.value = cpu_to_le16(0);
1659 }
1660 
1661 static int qedr_update_qp_state(struct qedr_dev *dev,
1662 				struct qedr_qp *qp,
1663 				enum qed_roce_qp_state new_state)
1664 {
1665 	int status = 0;
1666 
1667 	if (new_state == qp->state)
1668 		return 0;
1669 
1670 	switch (qp->state) {
1671 	case QED_ROCE_QP_STATE_RESET:
1672 		switch (new_state) {
1673 		case QED_ROCE_QP_STATE_INIT:
1674 			qp->prev_wqe_size = 0;
1675 			qedr_reset_qp_hwq_info(&qp->sq);
1676 			qedr_reset_qp_hwq_info(&qp->rq);
1677 			break;
1678 		default:
1679 			status = -EINVAL;
1680 			break;
1681 		};
1682 		break;
1683 	case QED_ROCE_QP_STATE_INIT:
1684 		switch (new_state) {
1685 		case QED_ROCE_QP_STATE_RTR:
1686 			/* Update doorbell (in case post_recv was
1687 			 * done before move to RTR)
1688 			 */
1689 			wmb();
1690 			writel(qp->rq.db_data.raw, qp->rq.db);
1691 			/* Make sure write takes effect */
1692 			mmiowb();
1693 			break;
1694 		case QED_ROCE_QP_STATE_ERR:
1695 			break;
1696 		default:
1697 			/* Invalid state change. */
1698 			status = -EINVAL;
1699 			break;
1700 		};
1701 		break;
1702 	case QED_ROCE_QP_STATE_RTR:
1703 		/* RTR->XXX */
1704 		switch (new_state) {
1705 		case QED_ROCE_QP_STATE_RTS:
1706 			break;
1707 		case QED_ROCE_QP_STATE_ERR:
1708 			break;
1709 		default:
1710 			/* Invalid state change. */
1711 			status = -EINVAL;
1712 			break;
1713 		};
1714 		break;
1715 	case QED_ROCE_QP_STATE_RTS:
1716 		/* RTS->XXX */
1717 		switch (new_state) {
1718 		case QED_ROCE_QP_STATE_SQD:
1719 			break;
1720 		case QED_ROCE_QP_STATE_ERR:
1721 			break;
1722 		default:
1723 			/* Invalid state change. */
1724 			status = -EINVAL;
1725 			break;
1726 		};
1727 		break;
1728 	case QED_ROCE_QP_STATE_SQD:
1729 		/* SQD->XXX */
1730 		switch (new_state) {
1731 		case QED_ROCE_QP_STATE_RTS:
1732 		case QED_ROCE_QP_STATE_ERR:
1733 			break;
1734 		default:
1735 			/* Invalid state change. */
1736 			status = -EINVAL;
1737 			break;
1738 		};
1739 		break;
1740 	case QED_ROCE_QP_STATE_ERR:
1741 		/* ERR->XXX */
1742 		switch (new_state) {
1743 		case QED_ROCE_QP_STATE_RESET:
1744 			if ((qp->rq.prod != qp->rq.cons) ||
1745 			    (qp->sq.prod != qp->sq.cons)) {
1746 				DP_NOTICE(dev,
1747 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1748 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1749 					  qp->sq.cons);
1750 				status = -EINVAL;
1751 			}
1752 			break;
1753 		default:
1754 			status = -EINVAL;
1755 			break;
1756 		};
1757 		break;
1758 	default:
1759 		status = -EINVAL;
1760 		break;
1761 	};
1762 
1763 	return status;
1764 }
1765 
1766 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1767 		   int attr_mask, struct ib_udata *udata)
1768 {
1769 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1770 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1771 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1772 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1773 	enum ib_qp_state old_qp_state, new_qp_state;
1774 	int rc = 0;
1775 
1776 	DP_DEBUG(dev, QEDR_MSG_QP,
1777 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1778 		 attr->qp_state);
1779 
1780 	old_qp_state = qedr_get_ibqp_state(qp->state);
1781 	if (attr_mask & IB_QP_STATE)
1782 		new_qp_state = attr->qp_state;
1783 	else
1784 		new_qp_state = old_qp_state;
1785 
1786 	if (!ib_modify_qp_is_ok
1787 	    (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
1788 	     IB_LINK_LAYER_ETHERNET)) {
1789 		DP_ERR(dev,
1790 		       "modify qp: invalid attribute mask=0x%x specified for\n"
1791 		       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1792 		       attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
1793 		       new_qp_state);
1794 		rc = -EINVAL;
1795 		goto err;
1796 	}
1797 
1798 	/* Translate the masks... */
1799 	if (attr_mask & IB_QP_STATE) {
1800 		SET_FIELD(qp_params.modify_flags,
1801 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1802 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1803 	}
1804 
1805 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1806 		qp_params.sqd_async = true;
1807 
1808 	if (attr_mask & IB_QP_PKEY_INDEX) {
1809 		SET_FIELD(qp_params.modify_flags,
1810 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1811 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1812 			rc = -EINVAL;
1813 			goto err;
1814 		}
1815 
1816 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1817 	}
1818 
1819 	if (attr_mask & IB_QP_QKEY)
1820 		qp->qkey = attr->qkey;
1821 
1822 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1823 		SET_FIELD(qp_params.modify_flags,
1824 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1825 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1826 						  IB_ACCESS_REMOTE_READ;
1827 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1828 						   IB_ACCESS_REMOTE_WRITE;
1829 		qp_params.incoming_atomic_en = attr->qp_access_flags &
1830 					       IB_ACCESS_REMOTE_ATOMIC;
1831 	}
1832 
1833 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1834 		if (attr_mask & IB_QP_PATH_MTU) {
1835 			if (attr->path_mtu < IB_MTU_256 ||
1836 			    attr->path_mtu > IB_MTU_4096) {
1837 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1838 				rc = -EINVAL;
1839 				goto err;
1840 			}
1841 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1842 				      ib_mtu_enum_to_int(iboe_get_mtu
1843 							 (dev->ndev->mtu)));
1844 		}
1845 
1846 		if (!qp->mtu) {
1847 			qp->mtu =
1848 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1849 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1850 		}
1851 
1852 		SET_FIELD(qp_params.modify_flags,
1853 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1854 
1855 		qp_params.traffic_class_tos = grh->traffic_class;
1856 		qp_params.flow_label = grh->flow_label;
1857 		qp_params.hop_limit_ttl = grh->hop_limit;
1858 
1859 		qp->sgid_idx = grh->sgid_index;
1860 
1861 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1862 		if (rc) {
1863 			DP_ERR(dev,
1864 			       "modify qp: problems with GID index %d (rc=%d)\n",
1865 			       grh->sgid_index, rc);
1866 			return rc;
1867 		}
1868 
1869 		rc = qedr_get_dmac(dev, &attr->ah_attr,
1870 				   qp_params.remote_mac_addr);
1871 		if (rc)
1872 			return rc;
1873 
1874 		qp_params.use_local_mac = true;
1875 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
1876 
1877 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
1878 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
1879 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
1880 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
1881 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
1882 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
1883 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
1884 			 qp_params.remote_mac_addr);
1885 
1886 		qp_params.mtu = qp->mtu;
1887 		qp_params.lb_indication = false;
1888 	}
1889 
1890 	if (!qp_params.mtu) {
1891 		/* Stay with current MTU */
1892 		if (qp->mtu)
1893 			qp_params.mtu = qp->mtu;
1894 		else
1895 			qp_params.mtu =
1896 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1897 	}
1898 
1899 	if (attr_mask & IB_QP_TIMEOUT) {
1900 		SET_FIELD(qp_params.modify_flags,
1901 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
1902 
1903 		qp_params.ack_timeout = attr->timeout;
1904 		if (attr->timeout) {
1905 			u32 temp;
1906 
1907 			temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
1908 			/* FW requires [msec] */
1909 			qp_params.ack_timeout = temp;
1910 		} else {
1911 			/* Infinite */
1912 			qp_params.ack_timeout = 0;
1913 		}
1914 	}
1915 	if (attr_mask & IB_QP_RETRY_CNT) {
1916 		SET_FIELD(qp_params.modify_flags,
1917 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
1918 		qp_params.retry_cnt = attr->retry_cnt;
1919 	}
1920 
1921 	if (attr_mask & IB_QP_RNR_RETRY) {
1922 		SET_FIELD(qp_params.modify_flags,
1923 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
1924 		qp_params.rnr_retry_cnt = attr->rnr_retry;
1925 	}
1926 
1927 	if (attr_mask & IB_QP_RQ_PSN) {
1928 		SET_FIELD(qp_params.modify_flags,
1929 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
1930 		qp_params.rq_psn = attr->rq_psn;
1931 		qp->rq_psn = attr->rq_psn;
1932 	}
1933 
1934 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1935 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
1936 			rc = -EINVAL;
1937 			DP_ERR(dev,
1938 			       "unsupported max_rd_atomic=%d, supported=%d\n",
1939 			       attr->max_rd_atomic,
1940 			       dev->attr.max_qp_req_rd_atomic_resc);
1941 			goto err;
1942 		}
1943 
1944 		SET_FIELD(qp_params.modify_flags,
1945 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
1946 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
1947 	}
1948 
1949 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1950 		SET_FIELD(qp_params.modify_flags,
1951 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
1952 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
1953 	}
1954 
1955 	if (attr_mask & IB_QP_SQ_PSN) {
1956 		SET_FIELD(qp_params.modify_flags,
1957 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
1958 		qp_params.sq_psn = attr->sq_psn;
1959 		qp->sq_psn = attr->sq_psn;
1960 	}
1961 
1962 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1963 		if (attr->max_dest_rd_atomic >
1964 		    dev->attr.max_qp_resp_rd_atomic_resc) {
1965 			DP_ERR(dev,
1966 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
1967 			       attr->max_dest_rd_atomic,
1968 			       dev->attr.max_qp_resp_rd_atomic_resc);
1969 
1970 			rc = -EINVAL;
1971 			goto err;
1972 		}
1973 
1974 		SET_FIELD(qp_params.modify_flags,
1975 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
1976 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
1977 	}
1978 
1979 	if (attr_mask & IB_QP_DEST_QPN) {
1980 		SET_FIELD(qp_params.modify_flags,
1981 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
1982 
1983 		qp_params.dest_qp = attr->dest_qp_num;
1984 		qp->dest_qp_num = attr->dest_qp_num;
1985 	}
1986 
1987 	if (qp->qp_type != IB_QPT_GSI)
1988 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
1989 					      qp->qed_qp, &qp_params);
1990 
1991 	if (attr_mask & IB_QP_STATE) {
1992 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
1993 			rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
1994 		qp->state = qp_params.new_state;
1995 	}
1996 
1997 err:
1998 	return rc;
1999 }
2000 
2001 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2002 {
2003 	int ib_qp_acc_flags = 0;
2004 
2005 	if (params->incoming_rdma_write_en)
2006 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2007 	if (params->incoming_rdma_read_en)
2008 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2009 	if (params->incoming_atomic_en)
2010 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2011 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2012 	return ib_qp_acc_flags;
2013 }
2014 
2015 int qedr_query_qp(struct ib_qp *ibqp,
2016 		  struct ib_qp_attr *qp_attr,
2017 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2018 {
2019 	struct qed_rdma_query_qp_out_params params;
2020 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2021 	struct qedr_dev *dev = qp->dev;
2022 	int rc = 0;
2023 
2024 	memset(&params, 0, sizeof(params));
2025 
2026 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2027 	if (rc)
2028 		goto err;
2029 
2030 	memset(qp_attr, 0, sizeof(*qp_attr));
2031 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2032 
2033 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2034 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2035 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2036 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2037 	qp_attr->rq_psn = params.rq_psn;
2038 	qp_attr->sq_psn = params.sq_psn;
2039 	qp_attr->dest_qp_num = params.dest_qp;
2040 
2041 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2042 
2043 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2044 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2045 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2046 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2047 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2048 	qp_init_attr->cap = qp_attr->cap;
2049 
2050 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2051 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2052 			params.flow_label, qp->sgid_idx,
2053 			params.hop_limit_ttl, params.traffic_class_tos);
2054 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2055 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2056 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2057 	qp_attr->timeout = params.timeout;
2058 	qp_attr->rnr_retry = params.rnr_retry;
2059 	qp_attr->retry_cnt = params.retry_cnt;
2060 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2061 	qp_attr->pkey_index = params.pkey_index;
2062 	qp_attr->port_num = 1;
2063 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2064 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2065 	qp_attr->alt_pkey_index = 0;
2066 	qp_attr->alt_port_num = 0;
2067 	qp_attr->alt_timeout = 0;
2068 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2069 
2070 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2071 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2072 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2073 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2074 
2075 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2076 		 qp_attr->cap.max_inline_data);
2077 
2078 err:
2079 	return rc;
2080 }
2081 
2082 int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2083 {
2084 	int rc = 0;
2085 
2086 	if (qp->qp_type != IB_QPT_GSI) {
2087 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2088 		if (rc)
2089 			return rc;
2090 	}
2091 
2092 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2093 		qedr_cleanup_user(dev, qp);
2094 	else
2095 		qedr_cleanup_kernel(dev, qp);
2096 
2097 	return 0;
2098 }
2099 
2100 int qedr_destroy_qp(struct ib_qp *ibqp)
2101 {
2102 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2103 	struct qedr_dev *dev = qp->dev;
2104 	struct ib_qp_attr attr;
2105 	int attr_mask = 0;
2106 	int rc = 0;
2107 
2108 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2109 		 qp, qp->qp_type);
2110 
2111 	if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2112 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2113 	    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2114 
2115 		attr.qp_state = IB_QPS_ERR;
2116 		attr_mask |= IB_QP_STATE;
2117 
2118 		/* Change the QP state to ERROR */
2119 		qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2120 	}
2121 
2122 	if (qp->qp_type == IB_QPT_GSI)
2123 		qedr_destroy_gsi_qp(dev);
2124 
2125 	qedr_free_qp_resources(dev, qp);
2126 
2127 	kfree(qp);
2128 
2129 	return rc;
2130 }
2131 
2132 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2133 			     struct ib_udata *udata)
2134 {
2135 	struct qedr_ah *ah;
2136 
2137 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2138 	if (!ah)
2139 		return ERR_PTR(-ENOMEM);
2140 
2141 	ah->attr = *attr;
2142 
2143 	return &ah->ibah;
2144 }
2145 
2146 int qedr_destroy_ah(struct ib_ah *ibah)
2147 {
2148 	struct qedr_ah *ah = get_qedr_ah(ibah);
2149 
2150 	kfree(ah);
2151 	return 0;
2152 }
2153 
2154 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2155 {
2156 	struct qedr_pbl *pbl, *tmp;
2157 
2158 	if (info->pbl_table)
2159 		list_add_tail(&info->pbl_table->list_entry,
2160 			      &info->free_pbl_list);
2161 
2162 	if (!list_empty(&info->inuse_pbl_list))
2163 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2164 
2165 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2166 		list_del(&pbl->list_entry);
2167 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2168 	}
2169 }
2170 
2171 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2172 			size_t page_list_len, bool two_layered)
2173 {
2174 	struct qedr_pbl *tmp;
2175 	int rc;
2176 
2177 	INIT_LIST_HEAD(&info->free_pbl_list);
2178 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2179 
2180 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2181 				  page_list_len, two_layered);
2182 	if (rc)
2183 		goto done;
2184 
2185 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2186 	if (IS_ERR(info->pbl_table)) {
2187 		rc = PTR_ERR(info->pbl_table);
2188 		goto done;
2189 	}
2190 
2191 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2192 		 &info->pbl_table->pa);
2193 
2194 	/* in usual case we use 2 PBLs, so we add one to free
2195 	 * list and allocating another one
2196 	 */
2197 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2198 	if (IS_ERR(tmp)) {
2199 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2200 		goto done;
2201 	}
2202 
2203 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2204 
2205 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2206 
2207 done:
2208 	if (rc)
2209 		free_mr_info(dev, info);
2210 
2211 	return rc;
2212 }
2213 
2214 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2215 			       u64 usr_addr, int acc, struct ib_udata *udata)
2216 {
2217 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2218 	struct qedr_mr *mr;
2219 	struct qedr_pd *pd;
2220 	int rc = -ENOMEM;
2221 
2222 	pd = get_qedr_pd(ibpd);
2223 	DP_DEBUG(dev, QEDR_MSG_MR,
2224 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2225 		 pd->pd_id, start, len, usr_addr, acc);
2226 
2227 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2228 		return ERR_PTR(-EINVAL);
2229 
2230 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2231 	if (!mr)
2232 		return ERR_PTR(rc);
2233 
2234 	mr->type = QEDR_MR_USER;
2235 
2236 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2237 	if (IS_ERR(mr->umem)) {
2238 		rc = -EFAULT;
2239 		goto err0;
2240 	}
2241 
2242 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2243 	if (rc)
2244 		goto err1;
2245 
2246 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2247 			   &mr->info.pbl_info, mr->umem->page_shift);
2248 
2249 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2250 	if (rc) {
2251 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2252 		goto err1;
2253 	}
2254 
2255 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2256 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2257 	mr->hw_mr.key = 0;
2258 	mr->hw_mr.pd = pd->pd_id;
2259 	mr->hw_mr.local_read = 1;
2260 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2261 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2262 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2263 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2264 	mr->hw_mr.mw_bind = false;
2265 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2266 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2267 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2268 	mr->hw_mr.page_size_log = mr->umem->page_shift;
2269 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2270 	mr->hw_mr.length = len;
2271 	mr->hw_mr.vaddr = usr_addr;
2272 	mr->hw_mr.zbva = false;
2273 	mr->hw_mr.phy_mr = false;
2274 	mr->hw_mr.dma_mr = false;
2275 
2276 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2277 	if (rc) {
2278 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2279 		goto err2;
2280 	}
2281 
2282 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2283 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2284 	    mr->hw_mr.remote_atomic)
2285 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2286 
2287 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2288 		 mr->ibmr.lkey);
2289 	return &mr->ibmr;
2290 
2291 err2:
2292 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2293 err1:
2294 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2295 err0:
2296 	kfree(mr);
2297 	return ERR_PTR(rc);
2298 }
2299 
2300 int qedr_dereg_mr(struct ib_mr *ib_mr)
2301 {
2302 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2303 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2304 	int rc = 0;
2305 
2306 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2307 	if (rc)
2308 		return rc;
2309 
2310 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2311 
2312 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2313 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2314 
2315 	/* it could be user registered memory. */
2316 	if (mr->umem)
2317 		ib_umem_release(mr->umem);
2318 
2319 	kfree(mr);
2320 
2321 	return rc;
2322 }
2323 
2324 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2325 				       int max_page_list_len)
2326 {
2327 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2328 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2329 	struct qedr_mr *mr;
2330 	int rc = -ENOMEM;
2331 
2332 	DP_DEBUG(dev, QEDR_MSG_MR,
2333 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2334 		 max_page_list_len);
2335 
2336 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2337 	if (!mr)
2338 		return ERR_PTR(rc);
2339 
2340 	mr->dev = dev;
2341 	mr->type = QEDR_MR_FRMR;
2342 
2343 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2344 	if (rc)
2345 		goto err0;
2346 
2347 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2348 	if (rc) {
2349 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2350 		goto err0;
2351 	}
2352 
2353 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2354 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2355 	mr->hw_mr.key = 0;
2356 	mr->hw_mr.pd = pd->pd_id;
2357 	mr->hw_mr.local_read = 1;
2358 	mr->hw_mr.local_write = 0;
2359 	mr->hw_mr.remote_read = 0;
2360 	mr->hw_mr.remote_write = 0;
2361 	mr->hw_mr.remote_atomic = 0;
2362 	mr->hw_mr.mw_bind = false;
2363 	mr->hw_mr.pbl_ptr = 0;
2364 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2365 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2366 	mr->hw_mr.fbo = 0;
2367 	mr->hw_mr.length = 0;
2368 	mr->hw_mr.vaddr = 0;
2369 	mr->hw_mr.zbva = false;
2370 	mr->hw_mr.phy_mr = true;
2371 	mr->hw_mr.dma_mr = false;
2372 
2373 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2374 	if (rc) {
2375 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2376 		goto err1;
2377 	}
2378 
2379 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2380 	mr->ibmr.rkey = mr->ibmr.lkey;
2381 
2382 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2383 	return mr;
2384 
2385 err1:
2386 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2387 err0:
2388 	kfree(mr);
2389 	return ERR_PTR(rc);
2390 }
2391 
2392 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2393 			    enum ib_mr_type mr_type, u32 max_num_sg)
2394 {
2395 	struct qedr_dev *dev;
2396 	struct qedr_mr *mr;
2397 
2398 	if (mr_type != IB_MR_TYPE_MEM_REG)
2399 		return ERR_PTR(-EINVAL);
2400 
2401 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2402 
2403 	if (IS_ERR(mr))
2404 		return ERR_PTR(-EINVAL);
2405 
2406 	dev = mr->dev;
2407 
2408 	return &mr->ibmr;
2409 }
2410 
2411 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2412 {
2413 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2414 	struct qedr_pbl *pbl_table;
2415 	struct regpair *pbe;
2416 	u32 pbes_in_page;
2417 
2418 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2419 		DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2420 		return -ENOMEM;
2421 	}
2422 
2423 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2424 		 mr->npages, addr);
2425 
2426 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2427 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2428 	pbe = (struct regpair *)pbl_table->va;
2429 	pbe +=  mr->npages % pbes_in_page;
2430 	pbe->lo = cpu_to_le32((u32)addr);
2431 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2432 
2433 	mr->npages++;
2434 
2435 	return 0;
2436 }
2437 
2438 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2439 {
2440 	int work = info->completed - info->completed_handled - 1;
2441 
2442 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2443 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2444 		struct qedr_pbl *pbl;
2445 
2446 		/* Free all the page list that are possible to be freed
2447 		 * (all the ones that were invalidated), under the assumption
2448 		 * that if an FMR was completed successfully that means that
2449 		 * if there was an invalidate operation before it also ended
2450 		 */
2451 		pbl = list_first_entry(&info->inuse_pbl_list,
2452 				       struct qedr_pbl, list_entry);
2453 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2454 		info->completed_handled++;
2455 	}
2456 }
2457 
2458 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2459 		   int sg_nents, unsigned int *sg_offset)
2460 {
2461 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2462 
2463 	mr->npages = 0;
2464 
2465 	handle_completed_mrs(mr->dev, &mr->info);
2466 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2467 }
2468 
2469 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2470 {
2471 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2472 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2473 	struct qedr_mr *mr;
2474 	int rc;
2475 
2476 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2477 	if (!mr)
2478 		return ERR_PTR(-ENOMEM);
2479 
2480 	mr->type = QEDR_MR_DMA;
2481 
2482 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2483 	if (rc) {
2484 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2485 		goto err1;
2486 	}
2487 
2488 	/* index only, 18 bit long, lkey = itid << 8 | key */
2489 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2490 	mr->hw_mr.pd = pd->pd_id;
2491 	mr->hw_mr.local_read = 1;
2492 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2493 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2494 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2495 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2496 	mr->hw_mr.dma_mr = true;
2497 
2498 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2499 	if (rc) {
2500 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2501 		goto err2;
2502 	}
2503 
2504 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2505 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2506 	    mr->hw_mr.remote_atomic)
2507 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2508 
2509 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2510 	return &mr->ibmr;
2511 
2512 err2:
2513 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2514 err1:
2515 	kfree(mr);
2516 	return ERR_PTR(rc);
2517 }
2518 
2519 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2520 {
2521 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2522 }
2523 
2524 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2525 {
2526 	int i, len = 0;
2527 
2528 	for (i = 0; i < num_sge; i++)
2529 		len += sg_list[i].length;
2530 
2531 	return len;
2532 }
2533 
2534 static void swap_wqe_data64(u64 *p)
2535 {
2536 	int i;
2537 
2538 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2539 		*p = cpu_to_be64(cpu_to_le64(*p));
2540 }
2541 
2542 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2543 				       struct qedr_qp *qp, u8 *wqe_size,
2544 				       struct ib_send_wr *wr,
2545 				       struct ib_send_wr **bad_wr, u8 *bits,
2546 				       u8 bit)
2547 {
2548 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2549 	char *seg_prt, *wqe;
2550 	int i, seg_siz;
2551 
2552 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2553 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2554 		*bad_wr = wr;
2555 		return 0;
2556 	}
2557 
2558 	if (!data_size)
2559 		return data_size;
2560 
2561 	*bits |= bit;
2562 
2563 	seg_prt = NULL;
2564 	wqe = NULL;
2565 	seg_siz = 0;
2566 
2567 	/* Copy data inline */
2568 	for (i = 0; i < wr->num_sge; i++) {
2569 		u32 len = wr->sg_list[i].length;
2570 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2571 
2572 		while (len > 0) {
2573 			u32 cur;
2574 
2575 			/* New segment required */
2576 			if (!seg_siz) {
2577 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2578 				seg_prt = wqe;
2579 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2580 				(*wqe_size)++;
2581 			}
2582 
2583 			/* Calculate currently allowed length */
2584 			cur = min_t(u32, len, seg_siz);
2585 			memcpy(seg_prt, src, cur);
2586 
2587 			/* Update segment variables */
2588 			seg_prt += cur;
2589 			seg_siz -= cur;
2590 
2591 			/* Update sge variables */
2592 			src += cur;
2593 			len -= cur;
2594 
2595 			/* Swap fully-completed segments */
2596 			if (!seg_siz)
2597 				swap_wqe_data64((u64 *)wqe);
2598 		}
2599 	}
2600 
2601 	/* swap last not completed segment */
2602 	if (seg_siz)
2603 		swap_wqe_data64((u64 *)wqe);
2604 
2605 	return data_size;
2606 }
2607 
2608 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2609 	do {							\
2610 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2611 		(sge)->length = cpu_to_le32(vlength);		\
2612 		(sge)->flags = cpu_to_le32(vflags);		\
2613 	} while (0)
2614 
2615 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2616 	do {							\
2617 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2618 		(hdr)->num_sges = num_sge;			\
2619 	} while (0)
2620 
2621 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2622 	do {							\
2623 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2624 		(sge)->length = cpu_to_le32(vlength);		\
2625 		(sge)->l_key = cpu_to_le32(vlkey);		\
2626 	} while (0)
2627 
2628 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2629 				struct ib_send_wr *wr)
2630 {
2631 	u32 data_size = 0;
2632 	int i;
2633 
2634 	for (i = 0; i < wr->num_sge; i++) {
2635 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2636 
2637 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2638 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2639 		sge->length = cpu_to_le32(wr->sg_list[i].length);
2640 		data_size += wr->sg_list[i].length;
2641 	}
2642 
2643 	if (wqe_size)
2644 		*wqe_size += wr->num_sge;
2645 
2646 	return data_size;
2647 }
2648 
2649 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2650 				     struct qedr_qp *qp,
2651 				     struct rdma_sq_rdma_wqe_1st *rwqe,
2652 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2653 				     struct ib_send_wr *wr,
2654 				     struct ib_send_wr **bad_wr)
2655 {
2656 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2657 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2658 
2659 	if (wr->send_flags & IB_SEND_INLINE &&
2660 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2661 	     wr->opcode == IB_WR_RDMA_WRITE)) {
2662 		u8 flags = 0;
2663 
2664 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2665 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2666 						   bad_wr, &rwqe->flags, flags);
2667 	}
2668 
2669 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2670 }
2671 
2672 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2673 				     struct qedr_qp *qp,
2674 				     struct rdma_sq_send_wqe_1st *swqe,
2675 				     struct rdma_sq_send_wqe_2st *swqe2,
2676 				     struct ib_send_wr *wr,
2677 				     struct ib_send_wr **bad_wr)
2678 {
2679 	memset(swqe2, 0, sizeof(*swqe2));
2680 	if (wr->send_flags & IB_SEND_INLINE) {
2681 		u8 flags = 0;
2682 
2683 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2684 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2685 						   bad_wr, &swqe->flags, flags);
2686 	}
2687 
2688 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2689 }
2690 
2691 static int qedr_prepare_reg(struct qedr_qp *qp,
2692 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2693 			    struct ib_reg_wr *wr)
2694 {
2695 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2696 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2697 
2698 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2699 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2700 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2701 	fwqe1->l_key = wr->key;
2702 
2703 	fwqe2->access_ctrl = 0;
2704 
2705 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2706 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2707 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2708 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2709 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2710 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2711 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2712 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2713 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2714 	fwqe2->fmr_ctrl = 0;
2715 
2716 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2717 		   ilog2(mr->ibmr.page_size) - 12);
2718 
2719 	fwqe2->length_hi = 0;
2720 	fwqe2->length_lo = mr->ibmr.length;
2721 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2722 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2723 
2724 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2725 
2726 	return 0;
2727 }
2728 
2729 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2730 {
2731 	switch (opcode) {
2732 	case IB_WR_RDMA_WRITE:
2733 	case IB_WR_RDMA_WRITE_WITH_IMM:
2734 		return IB_WC_RDMA_WRITE;
2735 	case IB_WR_SEND_WITH_IMM:
2736 	case IB_WR_SEND:
2737 	case IB_WR_SEND_WITH_INV:
2738 		return IB_WC_SEND;
2739 	case IB_WR_RDMA_READ:
2740 		return IB_WC_RDMA_READ;
2741 	case IB_WR_ATOMIC_CMP_AND_SWP:
2742 		return IB_WC_COMP_SWAP;
2743 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2744 		return IB_WC_FETCH_ADD;
2745 	case IB_WR_REG_MR:
2746 		return IB_WC_REG_MR;
2747 	case IB_WR_LOCAL_INV:
2748 		return IB_WC_LOCAL_INV;
2749 	default:
2750 		return IB_WC_SEND;
2751 	}
2752 }
2753 
2754 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2755 {
2756 	int wq_is_full, err_wr, pbl_is_full;
2757 	struct qedr_dev *dev = qp->dev;
2758 
2759 	/* prevent SQ overflow and/or processing of a bad WR */
2760 	err_wr = wr->num_sge > qp->sq.max_sges;
2761 	wq_is_full = qedr_wq_is_full(&qp->sq);
2762 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2763 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2764 	if (wq_is_full || err_wr || pbl_is_full) {
2765 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2766 			DP_ERR(dev,
2767 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2768 			       qp);
2769 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2770 		}
2771 
2772 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2773 			DP_ERR(dev,
2774 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2775 			       qp);
2776 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2777 		}
2778 
2779 		if (pbl_is_full &&
2780 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2781 			DP_ERR(dev,
2782 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2783 			       qp);
2784 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2785 		}
2786 		return false;
2787 	}
2788 	return true;
2789 }
2790 
2791 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2792 		     struct ib_send_wr **bad_wr)
2793 {
2794 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2795 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2796 	struct rdma_sq_atomic_wqe_1st *awqe1;
2797 	struct rdma_sq_atomic_wqe_2nd *awqe2;
2798 	struct rdma_sq_atomic_wqe_3rd *awqe3;
2799 	struct rdma_sq_send_wqe_2st *swqe2;
2800 	struct rdma_sq_local_inv_wqe *iwqe;
2801 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2802 	struct rdma_sq_send_wqe_1st *swqe;
2803 	struct rdma_sq_rdma_wqe_1st *rwqe;
2804 	struct rdma_sq_fmr_wqe_1st *fwqe1;
2805 	struct rdma_sq_common_wqe *wqe;
2806 	u32 length;
2807 	int rc = 0;
2808 	bool comp;
2809 
2810 	if (!qedr_can_post_send(qp, wr)) {
2811 		*bad_wr = wr;
2812 		return -ENOMEM;
2813 	}
2814 
2815 	wqe = qed_chain_produce(&qp->sq.pbl);
2816 	qp->wqe_wr_id[qp->sq.prod].signaled =
2817 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2818 
2819 	wqe->flags = 0;
2820 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2821 		   !!(wr->send_flags & IB_SEND_SOLICITED));
2822 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2823 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2824 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2825 		   !!(wr->send_flags & IB_SEND_FENCE));
2826 	wqe->prev_wqe_size = qp->prev_wqe_size;
2827 
2828 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2829 
2830 	switch (wr->opcode) {
2831 	case IB_WR_SEND_WITH_IMM:
2832 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
2833 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2834 		swqe->wqe_size = 2;
2835 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2836 
2837 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
2838 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2839 						   wr, bad_wr);
2840 		swqe->length = cpu_to_le32(length);
2841 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2842 		qp->prev_wqe_size = swqe->wqe_size;
2843 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2844 		break;
2845 	case IB_WR_SEND:
2846 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
2847 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2848 
2849 		swqe->wqe_size = 2;
2850 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2851 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2852 						   wr, bad_wr);
2853 		swqe->length = cpu_to_le32(length);
2854 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2855 		qp->prev_wqe_size = swqe->wqe_size;
2856 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2857 		break;
2858 	case IB_WR_SEND_WITH_INV:
2859 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
2860 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2861 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2862 		swqe->wqe_size = 2;
2863 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
2864 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2865 						   wr, bad_wr);
2866 		swqe->length = cpu_to_le32(length);
2867 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2868 		qp->prev_wqe_size = swqe->wqe_size;
2869 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2870 		break;
2871 
2872 	case IB_WR_RDMA_WRITE_WITH_IMM:
2873 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
2874 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2875 
2876 		rwqe->wqe_size = 2;
2877 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
2878 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2879 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2880 						   wr, bad_wr);
2881 		rwqe->length = cpu_to_le32(length);
2882 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2883 		qp->prev_wqe_size = rwqe->wqe_size;
2884 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2885 		break;
2886 	case IB_WR_RDMA_WRITE:
2887 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
2888 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2889 
2890 		rwqe->wqe_size = 2;
2891 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2892 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2893 						   wr, bad_wr);
2894 		rwqe->length = cpu_to_le32(length);
2895 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2896 		qp->prev_wqe_size = rwqe->wqe_size;
2897 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2898 		break;
2899 	case IB_WR_RDMA_READ_WITH_INV:
2900 		DP_ERR(dev,
2901 		       "RDMA READ WITH INVALIDATE not supported\n");
2902 		*bad_wr = wr;
2903 		rc = -EINVAL;
2904 		break;
2905 
2906 	case IB_WR_RDMA_READ:
2907 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
2908 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2909 
2910 		rwqe->wqe_size = 2;
2911 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2912 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2913 						   wr, bad_wr);
2914 		rwqe->length = cpu_to_le32(length);
2915 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2916 		qp->prev_wqe_size = rwqe->wqe_size;
2917 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2918 		break;
2919 
2920 	case IB_WR_ATOMIC_CMP_AND_SWP:
2921 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2922 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
2923 		awqe1->wqe_size = 4;
2924 
2925 		awqe2 = qed_chain_produce(&qp->sq.pbl);
2926 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
2927 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
2928 
2929 		awqe3 = qed_chain_produce(&qp->sq.pbl);
2930 
2931 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2932 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
2933 			DMA_REGPAIR_LE(awqe3->swap_data,
2934 				       atomic_wr(wr)->compare_add);
2935 		} else {
2936 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
2937 			DMA_REGPAIR_LE(awqe3->swap_data,
2938 				       atomic_wr(wr)->swap);
2939 			DMA_REGPAIR_LE(awqe3->cmp_data,
2940 				       atomic_wr(wr)->compare_add);
2941 		}
2942 
2943 		qedr_prepare_sq_sges(qp, NULL, wr);
2944 
2945 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
2946 		qp->prev_wqe_size = awqe1->wqe_size;
2947 		break;
2948 
2949 	case IB_WR_LOCAL_INV:
2950 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
2951 		iwqe->wqe_size = 1;
2952 
2953 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
2954 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
2955 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
2956 		qp->prev_wqe_size = iwqe->wqe_size;
2957 		break;
2958 	case IB_WR_REG_MR:
2959 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
2960 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
2961 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
2962 		fwqe1->wqe_size = 2;
2963 
2964 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
2965 		if (rc) {
2966 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
2967 			*bad_wr = wr;
2968 			break;
2969 		}
2970 
2971 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
2972 		qp->prev_wqe_size = fwqe1->wqe_size;
2973 		break;
2974 	default:
2975 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
2976 		rc = -EINVAL;
2977 		*bad_wr = wr;
2978 		break;
2979 	}
2980 
2981 	if (*bad_wr) {
2982 		u16 value;
2983 
2984 		/* Restore prod to its position before
2985 		 * this WR was processed
2986 		 */
2987 		value = le16_to_cpu(qp->sq.db_data.data.value);
2988 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
2989 
2990 		/* Restore prev_wqe_size */
2991 		qp->prev_wqe_size = wqe->prev_wqe_size;
2992 		rc = -EINVAL;
2993 		DP_ERR(dev, "POST SEND FAILED\n");
2994 	}
2995 
2996 	return rc;
2997 }
2998 
2999 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3000 		   struct ib_send_wr **bad_wr)
3001 {
3002 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3003 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3004 	unsigned long flags;
3005 	int rc = 0;
3006 
3007 	*bad_wr = NULL;
3008 
3009 	if (qp->qp_type == IB_QPT_GSI)
3010 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3011 
3012 	spin_lock_irqsave(&qp->q_lock, flags);
3013 
3014 	if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3015 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3016 	    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3017 		spin_unlock_irqrestore(&qp->q_lock, flags);
3018 		*bad_wr = wr;
3019 		DP_DEBUG(dev, QEDR_MSG_CQ,
3020 			 "QP in wrong state! QP icid=0x%x state %d\n",
3021 			 qp->icid, qp->state);
3022 		return -EINVAL;
3023 	}
3024 
3025 	while (wr) {
3026 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3027 		if (rc)
3028 			break;
3029 
3030 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3031 
3032 		qedr_inc_sw_prod(&qp->sq);
3033 
3034 		qp->sq.db_data.data.value++;
3035 
3036 		wr = wr->next;
3037 	}
3038 
3039 	/* Trigger doorbell
3040 	 * If there was a failure in the first WR then it will be triggered in
3041 	 * vane. However this is not harmful (as long as the producer value is
3042 	 * unchanged). For performance reasons we avoid checking for this
3043 	 * redundant doorbell.
3044 	 */
3045 	wmb();
3046 	writel(qp->sq.db_data.raw, qp->sq.db);
3047 
3048 	/* Make sure write sticks */
3049 	mmiowb();
3050 
3051 	spin_unlock_irqrestore(&qp->q_lock, flags);
3052 
3053 	return rc;
3054 }
3055 
3056 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3057 		   struct ib_recv_wr **bad_wr)
3058 {
3059 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3060 	struct qedr_dev *dev = qp->dev;
3061 	unsigned long flags;
3062 	int status = 0;
3063 
3064 	if (qp->qp_type == IB_QPT_GSI)
3065 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3066 
3067 	spin_lock_irqsave(&qp->q_lock, flags);
3068 
3069 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3070 		spin_unlock_irqrestore(&qp->q_lock, flags);
3071 		*bad_wr = wr;
3072 		return -EINVAL;
3073 	}
3074 
3075 	while (wr) {
3076 		int i;
3077 
3078 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3079 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3080 		    wr->num_sge > qp->rq.max_sges) {
3081 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3082 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3083 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3084 			       qp->rq.max_sges);
3085 			status = -ENOMEM;
3086 			*bad_wr = wr;
3087 			break;
3088 		}
3089 		for (i = 0; i < wr->num_sge; i++) {
3090 			u32 flags = 0;
3091 			struct rdma_rq_sge *rqe =
3092 			    qed_chain_produce(&qp->rq.pbl);
3093 
3094 			/* First one must include the number
3095 			 * of SGE in the list
3096 			 */
3097 			if (!i)
3098 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3099 					  wr->num_sge);
3100 
3101 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3102 				  wr->sg_list[i].lkey);
3103 
3104 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3105 				   wr->sg_list[i].length, flags);
3106 		}
3107 
3108 		/* Special case of no sges. FW requires between 1-4 sges...
3109 		 * in this case we need to post 1 sge with length zero. this is
3110 		 * because rdma write with immediate consumes an RQ.
3111 		 */
3112 		if (!wr->num_sge) {
3113 			u32 flags = 0;
3114 			struct rdma_rq_sge *rqe =
3115 			    qed_chain_produce(&qp->rq.pbl);
3116 
3117 			/* First one must include the number
3118 			 * of SGE in the list
3119 			 */
3120 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3121 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3122 
3123 			RQ_SGE_SET(rqe, 0, 0, flags);
3124 			i = 1;
3125 		}
3126 
3127 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3128 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3129 
3130 		qedr_inc_sw_prod(&qp->rq);
3131 
3132 		/* Flush all the writes before signalling doorbell */
3133 		wmb();
3134 
3135 		qp->rq.db_data.data.value++;
3136 
3137 		writel(qp->rq.db_data.raw, qp->rq.db);
3138 
3139 		/* Make sure write sticks */
3140 		mmiowb();
3141 
3142 		wr = wr->next;
3143 	}
3144 
3145 	spin_unlock_irqrestore(&qp->q_lock, flags);
3146 
3147 	return status;
3148 }
3149 
3150 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3151 {
3152 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3153 
3154 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3155 		cq->pbl_toggle;
3156 }
3157 
3158 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3159 {
3160 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3161 	struct qedr_qp *qp;
3162 
3163 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3164 						   resp_cqe->qp_handle.lo,
3165 						   u64);
3166 	return qp;
3167 }
3168 
3169 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3170 {
3171 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3172 
3173 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3174 }
3175 
3176 /* Return latest CQE (needs processing) */
3177 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3178 {
3179 	return cq->latest_cqe;
3180 }
3181 
3182 /* In fmr we need to increase the number of fmr completed counter for the fmr
3183  * algorithm determining whether we can free a pbl or not.
3184  * we need to perform this whether the work request was signaled or not. for
3185  * this purpose we call this function from the condition that checks if a wr
3186  * should be skipped, to make sure we don't miss it ( possibly this fmr
3187  * operation was not signalted)
3188  */
3189 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3190 {
3191 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3192 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3193 }
3194 
3195 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3196 		       struct qedr_cq *cq, int num_entries,
3197 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3198 		       int force)
3199 {
3200 	u16 cnt = 0;
3201 
3202 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3203 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3204 			qedr_chk_if_fmr(qp);
3205 			/* skip WC */
3206 			goto next_cqe;
3207 		}
3208 
3209 		/* fill WC */
3210 		wc->status = status;
3211 		wc->vendor_err = 0;
3212 		wc->wc_flags = 0;
3213 		wc->src_qp = qp->id;
3214 		wc->qp = &qp->ibqp;
3215 
3216 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3217 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3218 
3219 		switch (wc->opcode) {
3220 		case IB_WC_RDMA_WRITE:
3221 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3222 			break;
3223 		case IB_WC_COMP_SWAP:
3224 		case IB_WC_FETCH_ADD:
3225 			wc->byte_len = 8;
3226 			break;
3227 		case IB_WC_REG_MR:
3228 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3229 			break;
3230 		case IB_WC_RDMA_READ:
3231 		case IB_WC_SEND:
3232 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3233 			break;
3234 		default:
3235 			break;
3236 		}
3237 
3238 		num_entries--;
3239 		wc++;
3240 		cnt++;
3241 next_cqe:
3242 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3243 			qed_chain_consume(&qp->sq.pbl);
3244 		qedr_inc_sw_cons(&qp->sq);
3245 	}
3246 
3247 	return cnt;
3248 }
3249 
3250 static int qedr_poll_cq_req(struct qedr_dev *dev,
3251 			    struct qedr_qp *qp, struct qedr_cq *cq,
3252 			    int num_entries, struct ib_wc *wc,
3253 			    struct rdma_cqe_requester *req)
3254 {
3255 	int cnt = 0;
3256 
3257 	switch (req->status) {
3258 	case RDMA_CQE_REQ_STS_OK:
3259 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3260 				  IB_WC_SUCCESS, 0);
3261 		break;
3262 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3263 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3264 			DP_ERR(dev,
3265 			       "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3266 			       cq->icid, qp->icid);
3267 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3268 				  IB_WC_WR_FLUSH_ERR, 1);
3269 		break;
3270 	default:
3271 		/* process all WQE before the cosumer */
3272 		qp->state = QED_ROCE_QP_STATE_ERR;
3273 		cnt = process_req(dev, qp, cq, num_entries, wc,
3274 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3275 		wc += cnt;
3276 		/* if we have extra WC fill it with actual error info */
3277 		if (cnt < num_entries) {
3278 			enum ib_wc_status wc_status;
3279 
3280 			switch (req->status) {
3281 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3282 				DP_ERR(dev,
3283 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3284 				       cq->icid, qp->icid);
3285 				wc_status = IB_WC_BAD_RESP_ERR;
3286 				break;
3287 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3288 				DP_ERR(dev,
3289 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3290 				       cq->icid, qp->icid);
3291 				wc_status = IB_WC_LOC_LEN_ERR;
3292 				break;
3293 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3294 				DP_ERR(dev,
3295 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3296 				       cq->icid, qp->icid);
3297 				wc_status = IB_WC_LOC_QP_OP_ERR;
3298 				break;
3299 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3300 				DP_ERR(dev,
3301 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3302 				       cq->icid, qp->icid);
3303 				wc_status = IB_WC_LOC_PROT_ERR;
3304 				break;
3305 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3306 				DP_ERR(dev,
3307 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3308 				       cq->icid, qp->icid);
3309 				wc_status = IB_WC_MW_BIND_ERR;
3310 				break;
3311 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3312 				DP_ERR(dev,
3313 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3314 				       cq->icid, qp->icid);
3315 				wc_status = IB_WC_REM_INV_REQ_ERR;
3316 				break;
3317 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3318 				DP_ERR(dev,
3319 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3320 				       cq->icid, qp->icid);
3321 				wc_status = IB_WC_REM_ACCESS_ERR;
3322 				break;
3323 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3324 				DP_ERR(dev,
3325 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3326 				       cq->icid, qp->icid);
3327 				wc_status = IB_WC_REM_OP_ERR;
3328 				break;
3329 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3330 				DP_ERR(dev,
3331 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3332 				       cq->icid, qp->icid);
3333 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3334 				break;
3335 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3336 				DP_ERR(dev,
3337 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3338 				       cq->icid, qp->icid);
3339 				wc_status = IB_WC_RETRY_EXC_ERR;
3340 				break;
3341 			default:
3342 				DP_ERR(dev,
3343 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3344 				       cq->icid, qp->icid);
3345 				wc_status = IB_WC_GENERAL_ERR;
3346 			}
3347 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3348 					   wc_status, 1);
3349 		}
3350 	}
3351 
3352 	return cnt;
3353 }
3354 
3355 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3356 {
3357 	switch (status) {
3358 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3359 		return IB_WC_LOC_ACCESS_ERR;
3360 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3361 		return IB_WC_LOC_LEN_ERR;
3362 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3363 		return IB_WC_LOC_QP_OP_ERR;
3364 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3365 		return IB_WC_LOC_PROT_ERR;
3366 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3367 		return IB_WC_MW_BIND_ERR;
3368 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3369 		return IB_WC_REM_INV_RD_REQ_ERR;
3370 	case RDMA_CQE_RESP_STS_OK:
3371 		return IB_WC_SUCCESS;
3372 	default:
3373 		return IB_WC_GENERAL_ERR;
3374 	}
3375 }
3376 
3377 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3378 					  struct ib_wc *wc)
3379 {
3380 	wc->status = IB_WC_SUCCESS;
3381 	wc->byte_len = le32_to_cpu(resp->length);
3382 
3383 	if (resp->flags & QEDR_RESP_IMM) {
3384 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3385 		wc->wc_flags |= IB_WC_WITH_IMM;
3386 
3387 		if (resp->flags & QEDR_RESP_RDMA)
3388 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3389 
3390 		if (resp->flags & QEDR_RESP_INV)
3391 			return -EINVAL;
3392 
3393 	} else if (resp->flags & QEDR_RESP_INV) {
3394 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3395 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3396 
3397 		if (resp->flags & QEDR_RESP_RDMA)
3398 			return -EINVAL;
3399 
3400 	} else if (resp->flags & QEDR_RESP_RDMA) {
3401 		return -EINVAL;
3402 	}
3403 
3404 	return 0;
3405 }
3406 
3407 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3408 			       struct qedr_cq *cq, struct ib_wc *wc,
3409 			       struct rdma_cqe_responder *resp, u64 wr_id)
3410 {
3411 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3412 	wc->opcode = IB_WC_RECV;
3413 	wc->wc_flags = 0;
3414 
3415 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3416 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3417 			DP_ERR(dev,
3418 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3419 			       cq, cq->icid, resp->flags);
3420 
3421 	} else {
3422 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3423 		if (wc->status == IB_WC_GENERAL_ERR)
3424 			DP_ERR(dev,
3425 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3426 			       cq, cq->icid, resp->status);
3427 	}
3428 
3429 	/* Fill the rest of the WC */
3430 	wc->vendor_err = 0;
3431 	wc->src_qp = qp->id;
3432 	wc->qp = &qp->ibqp;
3433 	wc->wr_id = wr_id;
3434 }
3435 
3436 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3437 			    struct qedr_cq *cq, struct ib_wc *wc,
3438 			    struct rdma_cqe_responder *resp)
3439 {
3440 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3441 
3442 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3443 
3444 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3445 		qed_chain_consume(&qp->rq.pbl);
3446 	qedr_inc_sw_cons(&qp->rq);
3447 
3448 	return 1;
3449 }
3450 
3451 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3452 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3453 {
3454 	u16 cnt = 0;
3455 
3456 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3457 		/* fill WC */
3458 		wc->status = IB_WC_WR_FLUSH_ERR;
3459 		wc->vendor_err = 0;
3460 		wc->wc_flags = 0;
3461 		wc->src_qp = qp->id;
3462 		wc->byte_len = 0;
3463 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3464 		wc->qp = &qp->ibqp;
3465 		num_entries--;
3466 		wc++;
3467 		cnt++;
3468 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3469 			qed_chain_consume(&qp->rq.pbl);
3470 		qedr_inc_sw_cons(&qp->rq);
3471 	}
3472 
3473 	return cnt;
3474 }
3475 
3476 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3477 				 struct rdma_cqe_responder *resp, int *update)
3478 {
3479 	if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
3480 		consume_cqe(cq);
3481 		*update |= 1;
3482 	}
3483 }
3484 
3485 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3486 			     struct qedr_cq *cq, int num_entries,
3487 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3488 			     int *update)
3489 {
3490 	int cnt;
3491 
3492 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3493 		cnt = process_resp_flush(qp, cq, num_entries, wc,
3494 					 resp->rq_cons);
3495 		try_consume_resp_cqe(cq, qp, resp, update);
3496 	} else {
3497 		cnt = process_resp_one(dev, qp, cq, wc, resp);
3498 		consume_cqe(cq);
3499 		*update |= 1;
3500 	}
3501 
3502 	return cnt;
3503 }
3504 
3505 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3506 				struct rdma_cqe_requester *req, int *update)
3507 {
3508 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3509 		consume_cqe(cq);
3510 		*update |= 1;
3511 	}
3512 }
3513 
3514 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3515 {
3516 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3517 	struct qedr_cq *cq = get_qedr_cq(ibcq);
3518 	union rdma_cqe *cqe = cq->latest_cqe;
3519 	u32 old_cons, new_cons;
3520 	unsigned long flags;
3521 	int update = 0;
3522 	int done = 0;
3523 
3524 	if (cq->destroyed) {
3525 		DP_ERR(dev,
3526 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3527 		       cq, cq->icid);
3528 		return 0;
3529 	}
3530 
3531 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3532 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3533 
3534 	spin_lock_irqsave(&cq->cq_lock, flags);
3535 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3536 	while (num_entries && is_valid_cqe(cq, cqe)) {
3537 		struct qedr_qp *qp;
3538 		int cnt = 0;
3539 
3540 		/* prevent speculative reads of any field of CQE */
3541 		rmb();
3542 
3543 		qp = cqe_get_qp(cqe);
3544 		if (!qp) {
3545 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3546 			break;
3547 		}
3548 
3549 		wc->qp = &qp->ibqp;
3550 
3551 		switch (cqe_get_type(cqe)) {
3552 		case RDMA_CQE_TYPE_REQUESTER:
3553 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3554 					       &cqe->req);
3555 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3556 			break;
3557 		case RDMA_CQE_TYPE_RESPONDER_RQ:
3558 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3559 						&cqe->resp, &update);
3560 			break;
3561 		case RDMA_CQE_TYPE_INVALID:
3562 		default:
3563 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3564 			       cqe_get_type(cqe));
3565 		}
3566 		num_entries -= cnt;
3567 		wc += cnt;
3568 		done += cnt;
3569 
3570 		cqe = get_cqe(cq);
3571 	}
3572 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3573 
3574 	cq->cq_cons += new_cons - old_cons;
3575 
3576 	if (update)
3577 		/* doorbell notifies abount latest VALID entry,
3578 		 * but chain already point to the next INVALID one
3579 		 */
3580 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3581 
3582 	spin_unlock_irqrestore(&cq->cq_lock, flags);
3583 	return done;
3584 }
3585 
3586 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3587 		     u8 port_num,
3588 		     const struct ib_wc *in_wc,
3589 		     const struct ib_grh *in_grh,
3590 		     const struct ib_mad_hdr *mad_hdr,
3591 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3592 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3593 {
3594 	struct qedr_dev *dev = get_qedr_dev(ibdev);
3595 
3596 	DP_DEBUG(dev, QEDR_MSG_GSI,
3597 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3598 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3599 		 mad_hdr->class_specific, mad_hdr->class_version,
3600 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3601 	return IB_MAD_RESULT_SUCCESS;
3602 }
3603 
3604 int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
3605 			struct ib_port_immutable *immutable)
3606 {
3607 	struct ib_port_attr attr;
3608 	int err;
3609 
3610 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3611 				    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3612 
3613 	err = ib_query_port(ibdev, port_num, &attr);
3614 	if (err)
3615 		return err;
3616 
3617 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
3618 	immutable->gid_tbl_len = attr.gid_tbl_len;
3619 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
3620 
3621 	return 0;
3622 }
3623