xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision ad84dad2160d5f36bb471b391462d651c887d693)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_cm.h"
53 
54 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57 					size_t len)
58 {
59 	size_t min_len = min_t(size_t, len, udata->outlen);
60 
61 	return ib_copy_to_udata(udata, src, min_len);
62 }
63 
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
65 {
66 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67 		return -EINVAL;
68 
69 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
70 	return 0;
71 }
72 
73 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
74 		   union ib_gid *sgid)
75 {
76 	struct qedr_dev *dev = get_qedr_dev(ibdev);
77 	int rc = 0;
78 
79 	if (!rdma_cap_roce_gid_table(ibdev, port))
80 		return -ENODEV;
81 
82 	rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
83 	if (rc == -EAGAIN) {
84 		memcpy(sgid, &zgid, sizeof(*sgid));
85 		return 0;
86 	}
87 
88 	DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
89 		 sgid->global.interface_id, sgid->global.subnet_prefix);
90 
91 	return rc;
92 }
93 
94 int qedr_add_gid(struct ib_device *device, u8 port_num,
95 		 unsigned int index, const union ib_gid *gid,
96 		 const struct ib_gid_attr *attr, void **context)
97 {
98 	if (!rdma_cap_roce_gid_table(device, port_num))
99 		return -EINVAL;
100 
101 	if (port_num > QEDR_MAX_PORT)
102 		return -EINVAL;
103 
104 	if (!context)
105 		return -EINVAL;
106 
107 	return 0;
108 }
109 
110 int qedr_del_gid(struct ib_device *device, u8 port_num,
111 		 unsigned int index, void **context)
112 {
113 	if (!rdma_cap_roce_gid_table(device, port_num))
114 		return -EINVAL;
115 
116 	if (port_num > QEDR_MAX_PORT)
117 		return -EINVAL;
118 
119 	if (!context)
120 		return -EINVAL;
121 
122 	return 0;
123 }
124 
125 int qedr_query_device(struct ib_device *ibdev,
126 		      struct ib_device_attr *attr, struct ib_udata *udata)
127 {
128 	struct qedr_dev *dev = get_qedr_dev(ibdev);
129 	struct qedr_device_attr *qattr = &dev->attr;
130 
131 	if (!dev->rdma_ctx) {
132 		DP_ERR(dev,
133 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
134 		       dev->rdma_ctx);
135 		return -EINVAL;
136 	}
137 
138 	memset(attr, 0, sizeof(*attr));
139 
140 	attr->fw_ver = qattr->fw_ver;
141 	attr->sys_image_guid = qattr->sys_image_guid;
142 	attr->max_mr_size = qattr->max_mr_size;
143 	attr->page_size_cap = qattr->page_size_caps;
144 	attr->vendor_id = qattr->vendor_id;
145 	attr->vendor_part_id = qattr->vendor_part_id;
146 	attr->hw_ver = qattr->hw_ver;
147 	attr->max_qp = qattr->max_qp;
148 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
149 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
150 	    IB_DEVICE_RC_RNR_NAK_GEN |
151 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
152 
153 	attr->max_sge = qattr->max_sge;
154 	attr->max_sge_rd = qattr->max_sge;
155 	attr->max_cq = qattr->max_cq;
156 	attr->max_cqe = qattr->max_cqe;
157 	attr->max_mr = qattr->max_mr;
158 	attr->max_mw = qattr->max_mw;
159 	attr->max_pd = qattr->max_pd;
160 	attr->atomic_cap = dev->atomic_cap;
161 	attr->max_fmr = qattr->max_fmr;
162 	attr->max_map_per_fmr = 16;
163 	attr->max_qp_init_rd_atom =
164 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
165 	attr->max_qp_rd_atom =
166 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
167 		attr->max_qp_init_rd_atom);
168 
169 	attr->max_srq = qattr->max_srq;
170 	attr->max_srq_sge = qattr->max_srq_sge;
171 	attr->max_srq_wr = qattr->max_srq_wr;
172 
173 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
174 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
175 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
176 	attr->max_ah = qattr->max_ah;
177 
178 	return 0;
179 }
180 
181 #define QEDR_SPEED_SDR		(1)
182 #define QEDR_SPEED_DDR		(2)
183 #define QEDR_SPEED_QDR		(4)
184 #define QEDR_SPEED_FDR10	(8)
185 #define QEDR_SPEED_FDR		(16)
186 #define QEDR_SPEED_EDR		(32)
187 
188 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
189 					    u8 *ib_width)
190 {
191 	switch (speed) {
192 	case 1000:
193 		*ib_speed = QEDR_SPEED_SDR;
194 		*ib_width = IB_WIDTH_1X;
195 		break;
196 	case 10000:
197 		*ib_speed = QEDR_SPEED_QDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 20000:
202 		*ib_speed = QEDR_SPEED_DDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	case 25000:
207 		*ib_speed = QEDR_SPEED_EDR;
208 		*ib_width = IB_WIDTH_1X;
209 		break;
210 
211 	case 40000:
212 		*ib_speed = QEDR_SPEED_QDR;
213 		*ib_width = IB_WIDTH_4X;
214 		break;
215 
216 	case 50000:
217 		*ib_speed = QEDR_SPEED_QDR;
218 		*ib_width = IB_WIDTH_4X;
219 		break;
220 
221 	case 100000:
222 		*ib_speed = QEDR_SPEED_EDR;
223 		*ib_width = IB_WIDTH_4X;
224 		break;
225 
226 	default:
227 		/* Unsupported */
228 		*ib_speed = QEDR_SPEED_SDR;
229 		*ib_width = IB_WIDTH_1X;
230 	}
231 }
232 
233 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
234 {
235 	struct qedr_dev *dev;
236 	struct qed_rdma_port *rdma_port;
237 
238 	dev = get_qedr_dev(ibdev);
239 	if (port > 1) {
240 		DP_ERR(dev, "invalid_port=0x%x\n", port);
241 		return -EINVAL;
242 	}
243 
244 	if (!dev->rdma_ctx) {
245 		DP_ERR(dev, "rdma_ctx is NULL\n");
246 		return -EINVAL;
247 	}
248 
249 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
250 
251 	/* *attr being zeroed by the caller, avoid zeroing it here */
252 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
253 		attr->state = IB_PORT_ACTIVE;
254 		attr->phys_state = 5;
255 	} else {
256 		attr->state = IB_PORT_DOWN;
257 		attr->phys_state = 3;
258 	}
259 	attr->max_mtu = IB_MTU_4096;
260 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
261 	attr->lid = 0;
262 	attr->lmc = 0;
263 	attr->sm_lid = 0;
264 	attr->sm_sl = 0;
265 	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
266 	attr->gid_tbl_len = QEDR_MAX_SGID;
267 	attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
268 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
269 	attr->qkey_viol_cntr = 0;
270 	get_link_speed_and_width(rdma_port->link_speed,
271 				 &attr->active_speed, &attr->active_width);
272 	attr->max_msg_sz = rdma_port->max_msg_size;
273 	attr->max_vl_num = 4;
274 
275 	return 0;
276 }
277 
278 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
279 		     struct ib_port_modify *props)
280 {
281 	struct qedr_dev *dev;
282 
283 	dev = get_qedr_dev(ibdev);
284 	if (port > 1) {
285 		DP_ERR(dev, "invalid_port=0x%x\n", port);
286 		return -EINVAL;
287 	}
288 
289 	return 0;
290 }
291 
292 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
293 			 unsigned long len)
294 {
295 	struct qedr_mm *mm;
296 
297 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
298 	if (!mm)
299 		return -ENOMEM;
300 
301 	mm->key.phy_addr = phy_addr;
302 	/* This function might be called with a length which is not a multiple
303 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
304 	 * forces this granularity by increasing the requested size if needed.
305 	 * When qedr_mmap is called, it will search the list with the updated
306 	 * length as a key. To prevent search failures, the length is rounded up
307 	 * in advance to PAGE_SIZE.
308 	 */
309 	mm->key.len = roundup(len, PAGE_SIZE);
310 	INIT_LIST_HEAD(&mm->entry);
311 
312 	mutex_lock(&uctx->mm_list_lock);
313 	list_add(&mm->entry, &uctx->mm_head);
314 	mutex_unlock(&uctx->mm_list_lock);
315 
316 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
317 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
318 		 (unsigned long long)mm->key.phy_addr,
319 		 (unsigned long)mm->key.len, uctx);
320 
321 	return 0;
322 }
323 
324 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
325 			     unsigned long len)
326 {
327 	bool found = false;
328 	struct qedr_mm *mm;
329 
330 	mutex_lock(&uctx->mm_list_lock);
331 	list_for_each_entry(mm, &uctx->mm_head, entry) {
332 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
333 			continue;
334 
335 		found = true;
336 		break;
337 	}
338 	mutex_unlock(&uctx->mm_list_lock);
339 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
340 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
341 		 mm->key.phy_addr, mm->key.len, uctx, found);
342 
343 	return found;
344 }
345 
346 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
347 					struct ib_udata *udata)
348 {
349 	int rc;
350 	struct qedr_ucontext *ctx;
351 	struct qedr_alloc_ucontext_resp uresp;
352 	struct qedr_dev *dev = get_qedr_dev(ibdev);
353 	struct qed_rdma_add_user_out_params oparams;
354 
355 	if (!udata)
356 		return ERR_PTR(-EFAULT);
357 
358 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
359 	if (!ctx)
360 		return ERR_PTR(-ENOMEM);
361 
362 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
363 	if (rc) {
364 		DP_ERR(dev,
365 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
366 		       rc);
367 		goto err;
368 	}
369 
370 	ctx->dpi = oparams.dpi;
371 	ctx->dpi_addr = oparams.dpi_addr;
372 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
373 	ctx->dpi_size = oparams.dpi_size;
374 	INIT_LIST_HEAD(&ctx->mm_head);
375 	mutex_init(&ctx->mm_list_lock);
376 
377 	memset(&uresp, 0, sizeof(uresp));
378 
379 	uresp.dpm_enabled = dev->user_dpm_enabled;
380 	uresp.db_pa = ctx->dpi_phys_addr;
381 	uresp.db_size = ctx->dpi_size;
382 	uresp.max_send_wr = dev->attr.max_sqe;
383 	uresp.max_recv_wr = dev->attr.max_rqe;
384 	uresp.max_srq_wr = dev->attr.max_srq_wr;
385 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
386 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
387 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
388 	uresp.max_cqes = QEDR_MAX_CQES;
389 
390 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
391 	if (rc)
392 		goto err;
393 
394 	ctx->dev = dev;
395 
396 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
397 	if (rc)
398 		goto err;
399 
400 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
401 		 &ctx->ibucontext);
402 	return &ctx->ibucontext;
403 
404 err:
405 	kfree(ctx);
406 	return ERR_PTR(rc);
407 }
408 
409 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
410 {
411 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
412 	struct qedr_mm *mm, *tmp;
413 	int status = 0;
414 
415 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
416 		 uctx);
417 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
418 
419 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
420 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
421 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
422 			 mm->key.phy_addr, mm->key.len, uctx);
423 		list_del(&mm->entry);
424 		kfree(mm);
425 	}
426 
427 	kfree(uctx);
428 	return status;
429 }
430 
431 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
432 {
433 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
434 	struct qedr_dev *dev = get_qedr_dev(context->device);
435 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
436 	u64 unmapped_db = dev->db_phys_addr;
437 	unsigned long len = (vma->vm_end - vma->vm_start);
438 	int rc = 0;
439 	bool found;
440 
441 	DP_DEBUG(dev, QEDR_MSG_INIT,
442 		 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
443 		 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
444 	if (vma->vm_start & (PAGE_SIZE - 1)) {
445 		DP_ERR(dev, "Vma_start not page aligned = %ld\n",
446 		       vma->vm_start);
447 		return -EINVAL;
448 	}
449 
450 	found = qedr_search_mmap(ucontext, vm_page, len);
451 	if (!found) {
452 		DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
453 		       vma->vm_pgoff);
454 		return -EINVAL;
455 	}
456 
457 	DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
458 
459 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
460 						     dev->db_size))) {
461 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
462 		if (vma->vm_flags & VM_READ) {
463 			DP_ERR(dev, "Trying to map doorbell bar for read\n");
464 			return -EPERM;
465 		}
466 
467 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
468 
469 		rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
470 					PAGE_SIZE, vma->vm_page_prot);
471 	} else {
472 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
473 		rc = remap_pfn_range(vma, vma->vm_start,
474 				     vma->vm_pgoff, len, vma->vm_page_prot);
475 	}
476 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
477 	return rc;
478 }
479 
480 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
481 			    struct ib_ucontext *context, struct ib_udata *udata)
482 {
483 	struct qedr_dev *dev = get_qedr_dev(ibdev);
484 	struct qedr_pd *pd;
485 	u16 pd_id;
486 	int rc;
487 
488 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
489 		 (udata && context) ? "User Lib" : "Kernel");
490 
491 	if (!dev->rdma_ctx) {
492 		DP_ERR(dev, "invlaid RDMA context\n");
493 		return ERR_PTR(-EINVAL);
494 	}
495 
496 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
497 	if (!pd)
498 		return ERR_PTR(-ENOMEM);
499 
500 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
501 	if (rc)
502 		goto err;
503 
504 	pd->pd_id = pd_id;
505 
506 	if (udata && context) {
507 		struct qedr_alloc_pd_uresp uresp;
508 
509 		uresp.pd_id = pd_id;
510 
511 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
512 		if (rc) {
513 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
514 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
515 			goto err;
516 		}
517 
518 		pd->uctx = get_qedr_ucontext(context);
519 		pd->uctx->pd = pd;
520 	}
521 
522 	return &pd->ibpd;
523 
524 err:
525 	kfree(pd);
526 	return ERR_PTR(rc);
527 }
528 
529 int qedr_dealloc_pd(struct ib_pd *ibpd)
530 {
531 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
532 	struct qedr_pd *pd = get_qedr_pd(ibpd);
533 
534 	if (!pd) {
535 		pr_err("Invalid PD received in dealloc_pd\n");
536 		return -EINVAL;
537 	}
538 
539 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
540 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
541 
542 	kfree(pd);
543 
544 	return 0;
545 }
546 
547 static void qedr_free_pbl(struct qedr_dev *dev,
548 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
549 {
550 	struct pci_dev *pdev = dev->pdev;
551 	int i;
552 
553 	for (i = 0; i < pbl_info->num_pbls; i++) {
554 		if (!pbl[i].va)
555 			continue;
556 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
557 				  pbl[i].va, pbl[i].pa);
558 	}
559 
560 	kfree(pbl);
561 }
562 
563 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
564 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
565 
566 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
567 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
568 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
569 
570 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
571 					   struct qedr_pbl_info *pbl_info,
572 					   gfp_t flags)
573 {
574 	struct pci_dev *pdev = dev->pdev;
575 	struct qedr_pbl *pbl_table;
576 	dma_addr_t *pbl_main_tbl;
577 	dma_addr_t pa;
578 	void *va;
579 	int i;
580 
581 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
582 	if (!pbl_table)
583 		return ERR_PTR(-ENOMEM);
584 
585 	for (i = 0; i < pbl_info->num_pbls; i++) {
586 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
587 					&pa, flags);
588 		if (!va)
589 			goto err;
590 
591 		memset(va, 0, pbl_info->pbl_size);
592 		pbl_table[i].va = va;
593 		pbl_table[i].pa = pa;
594 	}
595 
596 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
597 	 * the first one with physical pointers to all of the rest
598 	 */
599 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
600 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
601 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
602 
603 	return pbl_table;
604 
605 err:
606 	for (i--; i >= 0; i--)
607 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
608 				  pbl_table[i].va, pbl_table[i].pa);
609 
610 	qedr_free_pbl(dev, pbl_info, pbl_table);
611 
612 	return ERR_PTR(-ENOMEM);
613 }
614 
615 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
616 				struct qedr_pbl_info *pbl_info,
617 				u32 num_pbes, int two_layer_capable)
618 {
619 	u32 pbl_capacity;
620 	u32 pbl_size;
621 	u32 num_pbls;
622 
623 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
624 		if (num_pbes > MAX_PBES_TWO_LAYER) {
625 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
626 			       num_pbes);
627 			return -EINVAL;
628 		}
629 
630 		/* calculate required pbl page size */
631 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
632 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
633 			       NUM_PBES_ON_PAGE(pbl_size);
634 
635 		while (pbl_capacity < num_pbes) {
636 			pbl_size *= 2;
637 			pbl_capacity = pbl_size / sizeof(u64);
638 			pbl_capacity = pbl_capacity * pbl_capacity;
639 		}
640 
641 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
642 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
643 		pbl_info->two_layered = true;
644 	} else {
645 		/* One layered PBL */
646 		num_pbls = 1;
647 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
648 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
649 		pbl_info->two_layered = false;
650 	}
651 
652 	pbl_info->num_pbls = num_pbls;
653 	pbl_info->pbl_size = pbl_size;
654 	pbl_info->num_pbes = num_pbes;
655 
656 	DP_DEBUG(dev, QEDR_MSG_MR,
657 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
658 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
659 
660 	return 0;
661 }
662 
663 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
664 			       struct qedr_pbl *pbl,
665 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
666 {
667 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
668 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
669 	struct qedr_pbl *pbl_tbl;
670 	struct scatterlist *sg;
671 	struct regpair *pbe;
672 	u64 pg_addr;
673 	int entry;
674 
675 	if (!pbl_info->num_pbes)
676 		return;
677 
678 	/* If we have a two layered pbl, the first pbl points to the rest
679 	 * of the pbls and the first entry lays on the second pbl in the table
680 	 */
681 	if (pbl_info->two_layered)
682 		pbl_tbl = &pbl[1];
683 	else
684 		pbl_tbl = pbl;
685 
686 	pbe = (struct regpair *)pbl_tbl->va;
687 	if (!pbe) {
688 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
689 		return;
690 	}
691 
692 	pbe_cnt = 0;
693 
694 	shift = umem->page_shift;
695 
696 	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
697 
698 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
699 		pages = sg_dma_len(sg) >> shift;
700 		pg_addr = sg_dma_address(sg);
701 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
702 			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
703 				pbe->lo = cpu_to_le32(pg_addr);
704 				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
705 
706 				pg_addr += BIT(pg_shift);
707 				pbe_cnt++;
708 				total_num_pbes++;
709 				pbe++;
710 
711 				if (total_num_pbes == pbl_info->num_pbes)
712 					return;
713 
714 				/* If the given pbl is full storing the pbes,
715 				 * move to next pbl.
716 				 */
717 				if (pbe_cnt ==
718 				    (pbl_info->pbl_size / sizeof(u64))) {
719 					pbl_tbl++;
720 					pbe = (struct regpair *)pbl_tbl->va;
721 					pbe_cnt = 0;
722 				}
723 
724 				fw_pg_cnt++;
725 			}
726 		}
727 	}
728 }
729 
730 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
731 			      struct qedr_cq *cq, struct ib_udata *udata)
732 {
733 	struct qedr_create_cq_uresp uresp;
734 	int rc;
735 
736 	memset(&uresp, 0, sizeof(uresp));
737 
738 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
739 	uresp.icid = cq->icid;
740 
741 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
742 	if (rc)
743 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
744 
745 	return rc;
746 }
747 
748 static void consume_cqe(struct qedr_cq *cq)
749 {
750 	if (cq->latest_cqe == cq->toggle_cqe)
751 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
752 
753 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
754 }
755 
756 static inline int qedr_align_cq_entries(int entries)
757 {
758 	u64 size, aligned_size;
759 
760 	/* We allocate an extra entry that we don't report to the FW. */
761 	size = (entries + 1) * QEDR_CQE_SIZE;
762 	aligned_size = ALIGN(size, PAGE_SIZE);
763 
764 	return aligned_size / QEDR_CQE_SIZE;
765 }
766 
767 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
768 				       struct qedr_dev *dev,
769 				       struct qedr_userq *q,
770 				       u64 buf_addr, size_t buf_len,
771 				       int access, int dmasync)
772 {
773 	u32 fw_pages;
774 	int rc;
775 
776 	q->buf_addr = buf_addr;
777 	q->buf_len = buf_len;
778 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
779 	if (IS_ERR(q->umem)) {
780 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
781 		       PTR_ERR(q->umem));
782 		return PTR_ERR(q->umem);
783 	}
784 
785 	fw_pages = ib_umem_page_count(q->umem) <<
786 	    (q->umem->page_shift - FW_PAGE_SHIFT);
787 
788 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
789 	if (rc)
790 		goto err0;
791 
792 	q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
793 	if (IS_ERR(q->pbl_tbl)) {
794 		rc = PTR_ERR(q->pbl_tbl);
795 		goto err0;
796 	}
797 
798 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
799 				   FW_PAGE_SHIFT);
800 
801 	return 0;
802 
803 err0:
804 	ib_umem_release(q->umem);
805 
806 	return rc;
807 }
808 
809 static inline void qedr_init_cq_params(struct qedr_cq *cq,
810 				       struct qedr_ucontext *ctx,
811 				       struct qedr_dev *dev, int vector,
812 				       int chain_entries, int page_cnt,
813 				       u64 pbl_ptr,
814 				       struct qed_rdma_create_cq_in_params
815 				       *params)
816 {
817 	memset(params, 0, sizeof(*params));
818 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
819 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
820 	params->cnq_id = vector;
821 	params->cq_size = chain_entries - 1;
822 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
823 	params->pbl_num_pages = page_cnt;
824 	params->pbl_ptr = pbl_ptr;
825 	params->pbl_two_level = 0;
826 }
827 
828 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
829 {
830 	/* Flush data before signalling doorbell */
831 	wmb();
832 	cq->db.data.agg_flags = flags;
833 	cq->db.data.value = cpu_to_le32(cons);
834 	writeq(cq->db.raw, cq->db_addr);
835 
836 	/* Make sure write would stick */
837 	mmiowb();
838 }
839 
840 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
841 {
842 	struct qedr_cq *cq = get_qedr_cq(ibcq);
843 	unsigned long sflags;
844 	struct qedr_dev *dev;
845 
846 	dev = get_qedr_dev(ibcq->device);
847 
848 	if (cq->destroyed) {
849 		DP_ERR(dev,
850 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
851 		       cq, cq->icid);
852 		return -EINVAL;
853 	}
854 
855 
856 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
857 		return 0;
858 
859 	spin_lock_irqsave(&cq->cq_lock, sflags);
860 
861 	cq->arm_flags = 0;
862 
863 	if (flags & IB_CQ_SOLICITED)
864 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
865 
866 	if (flags & IB_CQ_NEXT_COMP)
867 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
868 
869 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
870 
871 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
872 
873 	return 0;
874 }
875 
876 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
877 			     const struct ib_cq_init_attr *attr,
878 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
879 {
880 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
881 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
882 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
883 	struct qedr_dev *dev = get_qedr_dev(ibdev);
884 	struct qed_rdma_create_cq_in_params params;
885 	struct qedr_create_cq_ureq ureq;
886 	int vector = attr->comp_vector;
887 	int entries = attr->cqe;
888 	struct qedr_cq *cq;
889 	int chain_entries;
890 	int page_cnt;
891 	u64 pbl_ptr;
892 	u16 icid;
893 	int rc;
894 
895 	DP_DEBUG(dev, QEDR_MSG_INIT,
896 		 "create_cq: called from %s. entries=%d, vector=%d\n",
897 		 udata ? "User Lib" : "Kernel", entries, vector);
898 
899 	if (entries > QEDR_MAX_CQES) {
900 		DP_ERR(dev,
901 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
902 		       entries, QEDR_MAX_CQES);
903 		return ERR_PTR(-EINVAL);
904 	}
905 
906 	chain_entries = qedr_align_cq_entries(entries);
907 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
908 
909 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
910 	if (!cq)
911 		return ERR_PTR(-ENOMEM);
912 
913 	if (udata) {
914 		memset(&ureq, 0, sizeof(ureq));
915 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
916 			DP_ERR(dev,
917 			       "create cq: problem copying data from user space\n");
918 			goto err0;
919 		}
920 
921 		if (!ureq.len) {
922 			DP_ERR(dev,
923 			       "create cq: cannot create a cq with 0 entries\n");
924 			goto err0;
925 		}
926 
927 		cq->cq_type = QEDR_CQ_TYPE_USER;
928 
929 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
930 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
931 		if (rc)
932 			goto err0;
933 
934 		pbl_ptr = cq->q.pbl_tbl->pa;
935 		page_cnt = cq->q.pbl_info.num_pbes;
936 
937 		cq->ibcq.cqe = chain_entries;
938 	} else {
939 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
940 
941 		rc = dev->ops->common->chain_alloc(dev->cdev,
942 						   QED_CHAIN_USE_TO_CONSUME,
943 						   QED_CHAIN_MODE_PBL,
944 						   QED_CHAIN_CNT_TYPE_U32,
945 						   chain_entries,
946 						   sizeof(union rdma_cqe),
947 						   &cq->pbl, NULL);
948 		if (rc)
949 			goto err1;
950 
951 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
952 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
953 		cq->ibcq.cqe = cq->pbl.capacity;
954 	}
955 
956 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
957 			    pbl_ptr, &params);
958 
959 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
960 	if (rc)
961 		goto err2;
962 
963 	cq->icid = icid;
964 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
965 	spin_lock_init(&cq->cq_lock);
966 
967 	if (ib_ctx) {
968 		rc = qedr_copy_cq_uresp(dev, cq, udata);
969 		if (rc)
970 			goto err3;
971 	} else {
972 		/* Generate doorbell address. */
973 		cq->db_addr = dev->db_addr +
974 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
975 		cq->db.data.icid = cq->icid;
976 		cq->db.data.params = DB_AGG_CMD_SET <<
977 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
978 
979 		/* point to the very last element, passing it we will toggle */
980 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
981 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
982 		cq->latest_cqe = NULL;
983 		consume_cqe(cq);
984 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
985 	}
986 
987 	DP_DEBUG(dev, QEDR_MSG_CQ,
988 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
989 		 cq->icid, cq, params.cq_size);
990 
991 	return &cq->ibcq;
992 
993 err3:
994 	destroy_iparams.icid = cq->icid;
995 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
996 				  &destroy_oparams);
997 err2:
998 	if (udata)
999 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1000 	else
1001 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1002 err1:
1003 	if (udata)
1004 		ib_umem_release(cq->q.umem);
1005 err0:
1006 	kfree(cq);
1007 	return ERR_PTR(-EINVAL);
1008 }
1009 
1010 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1011 {
1012 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1013 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1014 
1015 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1016 
1017 	return 0;
1018 }
1019 
1020 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1021 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1022 
1023 int qedr_destroy_cq(struct ib_cq *ibcq)
1024 {
1025 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1026 	struct qed_rdma_destroy_cq_out_params oparams;
1027 	struct qed_rdma_destroy_cq_in_params iparams;
1028 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1029 	int iter;
1030 	int rc;
1031 
1032 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1033 
1034 	cq->destroyed = 1;
1035 
1036 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1037 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1038 		goto done;
1039 
1040 	iparams.icid = cq->icid;
1041 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1042 	if (rc)
1043 		return rc;
1044 
1045 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1046 
1047 	if (ibcq->uobject && ibcq->uobject->context) {
1048 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1049 		ib_umem_release(cq->q.umem);
1050 	}
1051 
1052 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1053 	 * wait until all CNQ interrupts, if any, are received. This will always
1054 	 * happen and will always happen very fast. If not, then a serious error
1055 	 * has occured. That is why we can use a long delay.
1056 	 * We spin for a short time so we don’t lose time on context switching
1057 	 * in case all the completions are handled in that span. Otherwise
1058 	 * we sleep for a while and check again. Since the CNQ may be
1059 	 * associated with (only) the current CPU we use msleep to allow the
1060 	 * current CPU to be freed.
1061 	 * The CNQ notification is increased in qedr_irq_handler().
1062 	 */
1063 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1064 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1065 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1066 		iter--;
1067 	}
1068 
1069 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1070 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1071 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1072 		iter--;
1073 	}
1074 
1075 	if (oparams.num_cq_notif != cq->cnq_notif)
1076 		goto err;
1077 
1078 	/* Note that we don't need to have explicit code to wait for the
1079 	 * completion of the event handler because it is invoked from the EQ.
1080 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1081 	 * be certain that there's no event handler in process.
1082 	 */
1083 done:
1084 	cq->sig = ~cq->sig;
1085 
1086 	kfree(cq);
1087 
1088 	return 0;
1089 
1090 err:
1091 	DP_ERR(dev,
1092 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1093 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1094 
1095 	return -EINVAL;
1096 }
1097 
1098 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1099 					  struct ib_qp_attr *attr,
1100 					  int attr_mask,
1101 					  struct qed_rdma_modify_qp_in_params
1102 					  *qp_params)
1103 {
1104 	enum rdma_network_type nw_type;
1105 	struct ib_gid_attr gid_attr;
1106 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1107 	union ib_gid gid;
1108 	u32 ipv4_addr;
1109 	int rc = 0;
1110 	int i;
1111 
1112 	rc = ib_get_cached_gid(ibqp->device,
1113 			       rdma_ah_get_port_num(&attr->ah_attr),
1114 			       grh->sgid_index, &gid, &gid_attr);
1115 	if (rc)
1116 		return rc;
1117 
1118 	if (!memcmp(&gid, &zgid, sizeof(gid)))
1119 		return -ENOENT;
1120 
1121 	if (gid_attr.ndev) {
1122 		qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1123 
1124 		dev_put(gid_attr.ndev);
1125 		nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1126 		switch (nw_type) {
1127 		case RDMA_NETWORK_IPV6:
1128 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1129 			       sizeof(qp_params->sgid));
1130 			memcpy(&qp_params->dgid.bytes[0],
1131 			       &grh->dgid,
1132 			       sizeof(qp_params->dgid));
1133 			qp_params->roce_mode = ROCE_V2_IPV6;
1134 			SET_FIELD(qp_params->modify_flags,
1135 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1136 			break;
1137 		case RDMA_NETWORK_IB:
1138 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1139 			       sizeof(qp_params->sgid));
1140 			memcpy(&qp_params->dgid.bytes[0],
1141 			       &grh->dgid,
1142 			       sizeof(qp_params->dgid));
1143 			qp_params->roce_mode = ROCE_V1;
1144 			break;
1145 		case RDMA_NETWORK_IPV4:
1146 			memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1147 			memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1148 			ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1149 			qp_params->sgid.ipv4_addr = ipv4_addr;
1150 			ipv4_addr =
1151 			    qedr_get_ipv4_from_gid(grh->dgid.raw);
1152 			qp_params->dgid.ipv4_addr = ipv4_addr;
1153 			SET_FIELD(qp_params->modify_flags,
1154 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1155 			qp_params->roce_mode = ROCE_V2_IPV4;
1156 			break;
1157 		}
1158 	}
1159 
1160 	for (i = 0; i < 4; i++) {
1161 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1162 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1163 	}
1164 
1165 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1166 		qp_params->vlan_id = 0;
1167 
1168 	return 0;
1169 }
1170 
1171 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1172 			       struct ib_qp_init_attr *attrs)
1173 {
1174 	struct qedr_device_attr *qattr = &dev->attr;
1175 
1176 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1177 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1178 		DP_DEBUG(dev, QEDR_MSG_QP,
1179 			 "create qp: unsupported qp type=0x%x requested\n",
1180 			 attrs->qp_type);
1181 		return -EINVAL;
1182 	}
1183 
1184 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1185 		DP_ERR(dev,
1186 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1187 		       attrs->cap.max_send_wr, qattr->max_sqe);
1188 		return -EINVAL;
1189 	}
1190 
1191 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1192 		DP_ERR(dev,
1193 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1194 		       attrs->cap.max_inline_data, qattr->max_inline);
1195 		return -EINVAL;
1196 	}
1197 
1198 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1199 		DP_ERR(dev,
1200 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1201 		       attrs->cap.max_send_sge, qattr->max_sge);
1202 		return -EINVAL;
1203 	}
1204 
1205 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1206 		DP_ERR(dev,
1207 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1208 		       attrs->cap.max_recv_sge, qattr->max_sge);
1209 		return -EINVAL;
1210 	}
1211 
1212 	/* Unprivileged user space cannot create special QP */
1213 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1214 		DP_ERR(dev,
1215 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1216 		       attrs->qp_type);
1217 		return -EINVAL;
1218 	}
1219 
1220 	return 0;
1221 }
1222 
1223 static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
1224 			       struct qedr_qp *qp)
1225 {
1226 	uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1227 	uresp->rq_icid = qp->icid;
1228 }
1229 
1230 static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
1231 			       struct qedr_qp *qp)
1232 {
1233 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1234 	uresp->sq_icid = qp->icid + 1;
1235 }
1236 
1237 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1238 			      struct qedr_qp *qp, struct ib_udata *udata)
1239 {
1240 	struct qedr_create_qp_uresp uresp;
1241 	int rc;
1242 
1243 	memset(&uresp, 0, sizeof(uresp));
1244 	qedr_copy_sq_uresp(&uresp, qp);
1245 	qedr_copy_rq_uresp(&uresp, qp);
1246 
1247 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1248 	uresp.qp_id = qp->qp_id;
1249 
1250 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1251 	if (rc)
1252 		DP_ERR(dev,
1253 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1254 		       qp->icid);
1255 
1256 	return rc;
1257 }
1258 
1259 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1260 				      struct qedr_qp *qp,
1261 				      struct qedr_pd *pd,
1262 				      struct ib_qp_init_attr *attrs)
1263 {
1264 	spin_lock_init(&qp->q_lock);
1265 	qp->pd = pd;
1266 	qp->qp_type = attrs->qp_type;
1267 	qp->max_inline_data = attrs->cap.max_inline_data;
1268 	qp->sq.max_sges = attrs->cap.max_send_sge;
1269 	qp->state = QED_ROCE_QP_STATE_RESET;
1270 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1271 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1272 	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1273 	qp->dev = dev;
1274 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1275 
1276 	DP_DEBUG(dev, QEDR_MSG_QP,
1277 		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1278 		 qp->rq.max_sges, qp->rq_cq->icid);
1279 	DP_DEBUG(dev, QEDR_MSG_QP,
1280 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1281 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1282 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1283 	DP_DEBUG(dev, QEDR_MSG_QP,
1284 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1285 		 qp->sq.max_sges, qp->sq_cq->icid);
1286 }
1287 
1288 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1289 {
1290 	qp->sq.db = dev->db_addr +
1291 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1292 	qp->sq.db_data.data.icid = qp->icid + 1;
1293 	qp->rq.db = dev->db_addr +
1294 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1295 	qp->rq.db_data.data.icid = qp->icid;
1296 }
1297 
1298 static inline void
1299 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1300 			      struct qedr_pd *pd,
1301 			      struct qedr_qp *qp,
1302 			      struct ib_qp_init_attr *attrs,
1303 			      bool fmr_and_reserved_lkey,
1304 			      struct qed_rdma_create_qp_in_params *params)
1305 {
1306 	/* QP handle to be written in an async event */
1307 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1308 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1309 
1310 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1311 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1312 	params->pd = pd->pd_id;
1313 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1314 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1315 	params->stats_queue = 0;
1316 	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1317 	params->srq_id = 0;
1318 	params->use_srq = false;
1319 }
1320 
1321 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1322 {
1323 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1324 		 "qp=%p. "
1325 		 "sq_addr=0x%llx, "
1326 		 "sq_len=%zd, "
1327 		 "rq_addr=0x%llx, "
1328 		 "rq_len=%zd"
1329 		 "\n",
1330 		 qp,
1331 		 qp->usq.buf_addr,
1332 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1333 }
1334 
1335 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1336 {
1337 	if (qp->usq.umem)
1338 		ib_umem_release(qp->usq.umem);
1339 	qp->usq.umem = NULL;
1340 
1341 	if (qp->urq.umem)
1342 		ib_umem_release(qp->urq.umem);
1343 	qp->urq.umem = NULL;
1344 }
1345 
1346 static int qedr_create_user_qp(struct qedr_dev *dev,
1347 			       struct qedr_qp *qp,
1348 			       struct ib_pd *ibpd,
1349 			       struct ib_udata *udata,
1350 			       struct ib_qp_init_attr *attrs)
1351 {
1352 	struct qed_rdma_create_qp_in_params in_params;
1353 	struct qed_rdma_create_qp_out_params out_params;
1354 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1355 	struct ib_ucontext *ib_ctx = NULL;
1356 	struct qedr_ucontext *ctx = NULL;
1357 	struct qedr_create_qp_ureq ureq;
1358 	int rc = -EINVAL;
1359 
1360 	ib_ctx = ibpd->uobject->context;
1361 	ctx = get_qedr_ucontext(ib_ctx);
1362 
1363 	memset(&ureq, 0, sizeof(ureq));
1364 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1365 	if (rc) {
1366 		DP_ERR(dev, "Problem copying data from user space\n");
1367 		return rc;
1368 	}
1369 
1370 	/* SQ - read access only (0), dma sync not required (0) */
1371 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1372 				  ureq.sq_len, 0, 0);
1373 	if (rc)
1374 		return rc;
1375 
1376 	/* RQ - read access only (0), dma sync not required (0) */
1377 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1378 				  ureq.rq_len, 0, 0);
1379 
1380 	if (rc)
1381 		return rc;
1382 
1383 	memset(&in_params, 0, sizeof(in_params));
1384 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1385 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1386 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1387 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1388 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1389 	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1390 	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1391 
1392 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1393 					      &in_params, &out_params);
1394 
1395 	if (!qp->qed_qp) {
1396 		rc = -ENOMEM;
1397 		goto err1;
1398 	}
1399 
1400 	qp->qp_id = out_params.qp_id;
1401 	qp->icid = out_params.icid;
1402 
1403 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1404 	if (rc)
1405 		goto err;
1406 
1407 	qedr_qp_user_print(dev, qp);
1408 
1409 	return 0;
1410 err:
1411 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1412 	if (rc)
1413 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1414 
1415 err1:
1416 	qedr_cleanup_user(dev, qp);
1417 	return rc;
1418 }
1419 
1420 static int
1421 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1422 			   struct qedr_qp *qp,
1423 			   struct qed_rdma_create_qp_in_params *in_params,
1424 			   u32 n_sq_elems, u32 n_rq_elems)
1425 {
1426 	struct qed_rdma_create_qp_out_params out_params;
1427 	int rc;
1428 
1429 	rc = dev->ops->common->chain_alloc(dev->cdev,
1430 					   QED_CHAIN_USE_TO_PRODUCE,
1431 					   QED_CHAIN_MODE_PBL,
1432 					   QED_CHAIN_CNT_TYPE_U32,
1433 					   n_sq_elems,
1434 					   QEDR_SQE_ELEMENT_SIZE,
1435 					   &qp->sq.pbl, NULL);
1436 
1437 	if (rc)
1438 		return rc;
1439 
1440 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1441 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1442 
1443 	rc = dev->ops->common->chain_alloc(dev->cdev,
1444 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1445 					   QED_CHAIN_MODE_PBL,
1446 					   QED_CHAIN_CNT_TYPE_U32,
1447 					   n_rq_elems,
1448 					   QEDR_RQE_ELEMENT_SIZE,
1449 					   &qp->rq.pbl, NULL);
1450 	if (rc)
1451 		return rc;
1452 
1453 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1454 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1455 
1456 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1457 					      in_params, &out_params);
1458 
1459 	if (!qp->qed_qp)
1460 		return -EINVAL;
1461 
1462 	qp->qp_id = out_params.qp_id;
1463 	qp->icid = out_params.icid;
1464 
1465 	qedr_set_roce_db_info(dev, qp);
1466 
1467 	return 0;
1468 }
1469 
1470 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1471 {
1472 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1473 	kfree(qp->wqe_wr_id);
1474 
1475 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1476 	kfree(qp->rqe_wr_id);
1477 }
1478 
1479 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1480 				 struct qedr_qp *qp,
1481 				 struct ib_pd *ibpd,
1482 				 struct ib_qp_init_attr *attrs)
1483 {
1484 	struct qed_rdma_create_qp_in_params in_params;
1485 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1486 	int rc = -EINVAL;
1487 	u32 n_rq_elems;
1488 	u32 n_sq_elems;
1489 	u32 n_sq_entries;
1490 
1491 	memset(&in_params, 0, sizeof(in_params));
1492 
1493 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1494 	 * the ring. The ring should allow at least a single WR, even if the
1495 	 * user requested none, due to allocation issues.
1496 	 * We should add an extra WR since the prod and cons indices of
1497 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1498 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1499 	 * double the number of entries due an iSER issue that pushes far more
1500 	 * WRs than indicated. If we decline its ib_post_send() then we get
1501 	 * error prints in the dmesg we'd like to avoid.
1502 	 */
1503 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1504 			      dev->attr.max_sqe);
1505 
1506 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1507 				GFP_KERNEL);
1508 	if (!qp->wqe_wr_id) {
1509 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1510 		return -ENOMEM;
1511 	}
1512 
1513 	/* QP handle to be written in CQE */
1514 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1515 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1516 
1517 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1518 	 * the ring. There ring should allow at least a single WR, even if the
1519 	 * user requested none, due to allocation issues.
1520 	 */
1521 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1522 
1523 	/* Allocate driver internal RQ array */
1524 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1525 				GFP_KERNEL);
1526 	if (!qp->rqe_wr_id) {
1527 		DP_ERR(dev,
1528 		       "create qp: failed RQ shadow memory allocation\n");
1529 		kfree(qp->wqe_wr_id);
1530 		return -ENOMEM;
1531 	}
1532 
1533 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1534 
1535 	n_sq_entries = attrs->cap.max_send_wr;
1536 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1537 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1538 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1539 
1540 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1541 
1542 	rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1543 					n_sq_elems, n_rq_elems);
1544 	if (rc)
1545 		qedr_cleanup_kernel(dev, qp);
1546 
1547 	return rc;
1548 }
1549 
1550 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1551 			     struct ib_qp_init_attr *attrs,
1552 			     struct ib_udata *udata)
1553 {
1554 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1555 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1556 	struct qedr_qp *qp;
1557 	struct ib_qp *ibqp;
1558 	int rc = 0;
1559 
1560 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1561 		 udata ? "user library" : "kernel", pd);
1562 
1563 	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1564 	if (rc)
1565 		return ERR_PTR(rc);
1566 
1567 	if (attrs->srq)
1568 		return ERR_PTR(-EINVAL);
1569 
1570 	DP_DEBUG(dev, QEDR_MSG_QP,
1571 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1572 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1573 		 get_qedr_cq(attrs->send_cq),
1574 		 get_qedr_cq(attrs->send_cq)->icid,
1575 		 get_qedr_cq(attrs->recv_cq),
1576 		 get_qedr_cq(attrs->recv_cq)->icid);
1577 
1578 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1579 	if (!qp) {
1580 		DP_ERR(dev, "create qp: failed allocating memory\n");
1581 		return ERR_PTR(-ENOMEM);
1582 	}
1583 
1584 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1585 
1586 	if (attrs->qp_type == IB_QPT_GSI) {
1587 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1588 		if (IS_ERR(ibqp))
1589 			kfree(qp);
1590 		return ibqp;
1591 	}
1592 
1593 	if (udata)
1594 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1595 	else
1596 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1597 
1598 	if (rc)
1599 		goto err;
1600 
1601 	qp->ibqp.qp_num = qp->qp_id;
1602 
1603 	return &qp->ibqp;
1604 
1605 err:
1606 	kfree(qp);
1607 
1608 	return ERR_PTR(-EFAULT);
1609 }
1610 
1611 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1612 {
1613 	switch (qp_state) {
1614 	case QED_ROCE_QP_STATE_RESET:
1615 		return IB_QPS_RESET;
1616 	case QED_ROCE_QP_STATE_INIT:
1617 		return IB_QPS_INIT;
1618 	case QED_ROCE_QP_STATE_RTR:
1619 		return IB_QPS_RTR;
1620 	case QED_ROCE_QP_STATE_RTS:
1621 		return IB_QPS_RTS;
1622 	case QED_ROCE_QP_STATE_SQD:
1623 		return IB_QPS_SQD;
1624 	case QED_ROCE_QP_STATE_ERR:
1625 		return IB_QPS_ERR;
1626 	case QED_ROCE_QP_STATE_SQE:
1627 		return IB_QPS_SQE;
1628 	}
1629 	return IB_QPS_ERR;
1630 }
1631 
1632 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1633 					enum ib_qp_state qp_state)
1634 {
1635 	switch (qp_state) {
1636 	case IB_QPS_RESET:
1637 		return QED_ROCE_QP_STATE_RESET;
1638 	case IB_QPS_INIT:
1639 		return QED_ROCE_QP_STATE_INIT;
1640 	case IB_QPS_RTR:
1641 		return QED_ROCE_QP_STATE_RTR;
1642 	case IB_QPS_RTS:
1643 		return QED_ROCE_QP_STATE_RTS;
1644 	case IB_QPS_SQD:
1645 		return QED_ROCE_QP_STATE_SQD;
1646 	case IB_QPS_ERR:
1647 		return QED_ROCE_QP_STATE_ERR;
1648 	default:
1649 		return QED_ROCE_QP_STATE_ERR;
1650 	}
1651 }
1652 
1653 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1654 {
1655 	qed_chain_reset(&qph->pbl);
1656 	qph->prod = 0;
1657 	qph->cons = 0;
1658 	qph->wqe_cons = 0;
1659 	qph->db_data.data.value = cpu_to_le16(0);
1660 }
1661 
1662 static int qedr_update_qp_state(struct qedr_dev *dev,
1663 				struct qedr_qp *qp,
1664 				enum qed_roce_qp_state new_state)
1665 {
1666 	int status = 0;
1667 
1668 	if (new_state == qp->state)
1669 		return 0;
1670 
1671 	switch (qp->state) {
1672 	case QED_ROCE_QP_STATE_RESET:
1673 		switch (new_state) {
1674 		case QED_ROCE_QP_STATE_INIT:
1675 			qp->prev_wqe_size = 0;
1676 			qedr_reset_qp_hwq_info(&qp->sq);
1677 			qedr_reset_qp_hwq_info(&qp->rq);
1678 			break;
1679 		default:
1680 			status = -EINVAL;
1681 			break;
1682 		};
1683 		break;
1684 	case QED_ROCE_QP_STATE_INIT:
1685 		switch (new_state) {
1686 		case QED_ROCE_QP_STATE_RTR:
1687 			/* Update doorbell (in case post_recv was
1688 			 * done before move to RTR)
1689 			 */
1690 			wmb();
1691 			writel(qp->rq.db_data.raw, qp->rq.db);
1692 			/* Make sure write takes effect */
1693 			mmiowb();
1694 			break;
1695 		case QED_ROCE_QP_STATE_ERR:
1696 			break;
1697 		default:
1698 			/* Invalid state change. */
1699 			status = -EINVAL;
1700 			break;
1701 		};
1702 		break;
1703 	case QED_ROCE_QP_STATE_RTR:
1704 		/* RTR->XXX */
1705 		switch (new_state) {
1706 		case QED_ROCE_QP_STATE_RTS:
1707 			break;
1708 		case QED_ROCE_QP_STATE_ERR:
1709 			break;
1710 		default:
1711 			/* Invalid state change. */
1712 			status = -EINVAL;
1713 			break;
1714 		};
1715 		break;
1716 	case QED_ROCE_QP_STATE_RTS:
1717 		/* RTS->XXX */
1718 		switch (new_state) {
1719 		case QED_ROCE_QP_STATE_SQD:
1720 			break;
1721 		case QED_ROCE_QP_STATE_ERR:
1722 			break;
1723 		default:
1724 			/* Invalid state change. */
1725 			status = -EINVAL;
1726 			break;
1727 		};
1728 		break;
1729 	case QED_ROCE_QP_STATE_SQD:
1730 		/* SQD->XXX */
1731 		switch (new_state) {
1732 		case QED_ROCE_QP_STATE_RTS:
1733 		case QED_ROCE_QP_STATE_ERR:
1734 			break;
1735 		default:
1736 			/* Invalid state change. */
1737 			status = -EINVAL;
1738 			break;
1739 		};
1740 		break;
1741 	case QED_ROCE_QP_STATE_ERR:
1742 		/* ERR->XXX */
1743 		switch (new_state) {
1744 		case QED_ROCE_QP_STATE_RESET:
1745 			if ((qp->rq.prod != qp->rq.cons) ||
1746 			    (qp->sq.prod != qp->sq.cons)) {
1747 				DP_NOTICE(dev,
1748 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1749 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1750 					  qp->sq.cons);
1751 				status = -EINVAL;
1752 			}
1753 			break;
1754 		default:
1755 			status = -EINVAL;
1756 			break;
1757 		};
1758 		break;
1759 	default:
1760 		status = -EINVAL;
1761 		break;
1762 	};
1763 
1764 	return status;
1765 }
1766 
1767 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1768 		   int attr_mask, struct ib_udata *udata)
1769 {
1770 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1771 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1772 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1773 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1774 	enum ib_qp_state old_qp_state, new_qp_state;
1775 	int rc = 0;
1776 
1777 	DP_DEBUG(dev, QEDR_MSG_QP,
1778 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1779 		 attr->qp_state);
1780 
1781 	old_qp_state = qedr_get_ibqp_state(qp->state);
1782 	if (attr_mask & IB_QP_STATE)
1783 		new_qp_state = attr->qp_state;
1784 	else
1785 		new_qp_state = old_qp_state;
1786 
1787 	if (!ib_modify_qp_is_ok
1788 	    (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
1789 	     IB_LINK_LAYER_ETHERNET)) {
1790 		DP_ERR(dev,
1791 		       "modify qp: invalid attribute mask=0x%x specified for\n"
1792 		       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1793 		       attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
1794 		       new_qp_state);
1795 		rc = -EINVAL;
1796 		goto err;
1797 	}
1798 
1799 	/* Translate the masks... */
1800 	if (attr_mask & IB_QP_STATE) {
1801 		SET_FIELD(qp_params.modify_flags,
1802 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1803 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1804 	}
1805 
1806 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1807 		qp_params.sqd_async = true;
1808 
1809 	if (attr_mask & IB_QP_PKEY_INDEX) {
1810 		SET_FIELD(qp_params.modify_flags,
1811 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1812 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1813 			rc = -EINVAL;
1814 			goto err;
1815 		}
1816 
1817 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1818 	}
1819 
1820 	if (attr_mask & IB_QP_QKEY)
1821 		qp->qkey = attr->qkey;
1822 
1823 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1824 		SET_FIELD(qp_params.modify_flags,
1825 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1826 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1827 						  IB_ACCESS_REMOTE_READ;
1828 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1829 						   IB_ACCESS_REMOTE_WRITE;
1830 		qp_params.incoming_atomic_en = attr->qp_access_flags &
1831 					       IB_ACCESS_REMOTE_ATOMIC;
1832 	}
1833 
1834 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1835 		if (attr_mask & IB_QP_PATH_MTU) {
1836 			if (attr->path_mtu < IB_MTU_256 ||
1837 			    attr->path_mtu > IB_MTU_4096) {
1838 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1839 				rc = -EINVAL;
1840 				goto err;
1841 			}
1842 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1843 				      ib_mtu_enum_to_int(iboe_get_mtu
1844 							 (dev->ndev->mtu)));
1845 		}
1846 
1847 		if (!qp->mtu) {
1848 			qp->mtu =
1849 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1850 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1851 		}
1852 
1853 		SET_FIELD(qp_params.modify_flags,
1854 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1855 
1856 		qp_params.traffic_class_tos = grh->traffic_class;
1857 		qp_params.flow_label = grh->flow_label;
1858 		qp_params.hop_limit_ttl = grh->hop_limit;
1859 
1860 		qp->sgid_idx = grh->sgid_index;
1861 
1862 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1863 		if (rc) {
1864 			DP_ERR(dev,
1865 			       "modify qp: problems with GID index %d (rc=%d)\n",
1866 			       grh->sgid_index, rc);
1867 			return rc;
1868 		}
1869 
1870 		rc = qedr_get_dmac(dev, &attr->ah_attr,
1871 				   qp_params.remote_mac_addr);
1872 		if (rc)
1873 			return rc;
1874 
1875 		qp_params.use_local_mac = true;
1876 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
1877 
1878 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
1879 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
1880 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
1881 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
1882 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
1883 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
1884 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
1885 			 qp_params.remote_mac_addr);
1886 
1887 		qp_params.mtu = qp->mtu;
1888 		qp_params.lb_indication = false;
1889 	}
1890 
1891 	if (!qp_params.mtu) {
1892 		/* Stay with current MTU */
1893 		if (qp->mtu)
1894 			qp_params.mtu = qp->mtu;
1895 		else
1896 			qp_params.mtu =
1897 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1898 	}
1899 
1900 	if (attr_mask & IB_QP_TIMEOUT) {
1901 		SET_FIELD(qp_params.modify_flags,
1902 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
1903 
1904 		qp_params.ack_timeout = attr->timeout;
1905 		if (attr->timeout) {
1906 			u32 temp;
1907 
1908 			temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
1909 			/* FW requires [msec] */
1910 			qp_params.ack_timeout = temp;
1911 		} else {
1912 			/* Infinite */
1913 			qp_params.ack_timeout = 0;
1914 		}
1915 	}
1916 	if (attr_mask & IB_QP_RETRY_CNT) {
1917 		SET_FIELD(qp_params.modify_flags,
1918 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
1919 		qp_params.retry_cnt = attr->retry_cnt;
1920 	}
1921 
1922 	if (attr_mask & IB_QP_RNR_RETRY) {
1923 		SET_FIELD(qp_params.modify_flags,
1924 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
1925 		qp_params.rnr_retry_cnt = attr->rnr_retry;
1926 	}
1927 
1928 	if (attr_mask & IB_QP_RQ_PSN) {
1929 		SET_FIELD(qp_params.modify_flags,
1930 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
1931 		qp_params.rq_psn = attr->rq_psn;
1932 		qp->rq_psn = attr->rq_psn;
1933 	}
1934 
1935 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1936 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
1937 			rc = -EINVAL;
1938 			DP_ERR(dev,
1939 			       "unsupported max_rd_atomic=%d, supported=%d\n",
1940 			       attr->max_rd_atomic,
1941 			       dev->attr.max_qp_req_rd_atomic_resc);
1942 			goto err;
1943 		}
1944 
1945 		SET_FIELD(qp_params.modify_flags,
1946 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
1947 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
1948 	}
1949 
1950 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1951 		SET_FIELD(qp_params.modify_flags,
1952 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
1953 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
1954 	}
1955 
1956 	if (attr_mask & IB_QP_SQ_PSN) {
1957 		SET_FIELD(qp_params.modify_flags,
1958 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
1959 		qp_params.sq_psn = attr->sq_psn;
1960 		qp->sq_psn = attr->sq_psn;
1961 	}
1962 
1963 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1964 		if (attr->max_dest_rd_atomic >
1965 		    dev->attr.max_qp_resp_rd_atomic_resc) {
1966 			DP_ERR(dev,
1967 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
1968 			       attr->max_dest_rd_atomic,
1969 			       dev->attr.max_qp_resp_rd_atomic_resc);
1970 
1971 			rc = -EINVAL;
1972 			goto err;
1973 		}
1974 
1975 		SET_FIELD(qp_params.modify_flags,
1976 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
1977 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
1978 	}
1979 
1980 	if (attr_mask & IB_QP_DEST_QPN) {
1981 		SET_FIELD(qp_params.modify_flags,
1982 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
1983 
1984 		qp_params.dest_qp = attr->dest_qp_num;
1985 		qp->dest_qp_num = attr->dest_qp_num;
1986 	}
1987 
1988 	if (qp->qp_type != IB_QPT_GSI)
1989 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
1990 					      qp->qed_qp, &qp_params);
1991 
1992 	if (attr_mask & IB_QP_STATE) {
1993 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
1994 			rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
1995 		qp->state = qp_params.new_state;
1996 	}
1997 
1998 err:
1999 	return rc;
2000 }
2001 
2002 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2003 {
2004 	int ib_qp_acc_flags = 0;
2005 
2006 	if (params->incoming_rdma_write_en)
2007 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2008 	if (params->incoming_rdma_read_en)
2009 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2010 	if (params->incoming_atomic_en)
2011 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2012 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2013 	return ib_qp_acc_flags;
2014 }
2015 
2016 int qedr_query_qp(struct ib_qp *ibqp,
2017 		  struct ib_qp_attr *qp_attr,
2018 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2019 {
2020 	struct qed_rdma_query_qp_out_params params;
2021 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2022 	struct qedr_dev *dev = qp->dev;
2023 	int rc = 0;
2024 
2025 	memset(&params, 0, sizeof(params));
2026 
2027 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2028 	if (rc)
2029 		goto err;
2030 
2031 	memset(qp_attr, 0, sizeof(*qp_attr));
2032 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2033 
2034 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2035 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2036 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2037 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2038 	qp_attr->rq_psn = params.rq_psn;
2039 	qp_attr->sq_psn = params.sq_psn;
2040 	qp_attr->dest_qp_num = params.dest_qp;
2041 
2042 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2043 
2044 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2045 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2046 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2047 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2048 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2049 	qp_init_attr->cap = qp_attr->cap;
2050 
2051 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2052 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2053 			params.flow_label, qp->sgid_idx,
2054 			params.hop_limit_ttl, params.traffic_class_tos);
2055 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2056 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2057 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2058 	qp_attr->timeout = params.timeout;
2059 	qp_attr->rnr_retry = params.rnr_retry;
2060 	qp_attr->retry_cnt = params.retry_cnt;
2061 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2062 	qp_attr->pkey_index = params.pkey_index;
2063 	qp_attr->port_num = 1;
2064 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2065 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2066 	qp_attr->alt_pkey_index = 0;
2067 	qp_attr->alt_port_num = 0;
2068 	qp_attr->alt_timeout = 0;
2069 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2070 
2071 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2072 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2073 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2074 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2075 
2076 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2077 		 qp_attr->cap.max_inline_data);
2078 
2079 err:
2080 	return rc;
2081 }
2082 
2083 int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2084 {
2085 	int rc = 0;
2086 
2087 	if (qp->qp_type != IB_QPT_GSI) {
2088 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2089 		if (rc)
2090 			return rc;
2091 	}
2092 
2093 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2094 		qedr_cleanup_user(dev, qp);
2095 	else
2096 		qedr_cleanup_kernel(dev, qp);
2097 
2098 	return 0;
2099 }
2100 
2101 int qedr_destroy_qp(struct ib_qp *ibqp)
2102 {
2103 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2104 	struct qedr_dev *dev = qp->dev;
2105 	struct ib_qp_attr attr;
2106 	int attr_mask = 0;
2107 	int rc = 0;
2108 
2109 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2110 		 qp, qp->qp_type);
2111 
2112 	if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2113 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2114 	    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2115 
2116 		attr.qp_state = IB_QPS_ERR;
2117 		attr_mask |= IB_QP_STATE;
2118 
2119 		/* Change the QP state to ERROR */
2120 		qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2121 	}
2122 
2123 	if (qp->qp_type == IB_QPT_GSI)
2124 		qedr_destroy_gsi_qp(dev);
2125 
2126 	qedr_free_qp_resources(dev, qp);
2127 
2128 	kfree(qp);
2129 
2130 	return rc;
2131 }
2132 
2133 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2134 			     struct ib_udata *udata)
2135 {
2136 	struct qedr_ah *ah;
2137 
2138 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2139 	if (!ah)
2140 		return ERR_PTR(-ENOMEM);
2141 
2142 	ah->attr = *attr;
2143 
2144 	return &ah->ibah;
2145 }
2146 
2147 int qedr_destroy_ah(struct ib_ah *ibah)
2148 {
2149 	struct qedr_ah *ah = get_qedr_ah(ibah);
2150 
2151 	kfree(ah);
2152 	return 0;
2153 }
2154 
2155 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2156 {
2157 	struct qedr_pbl *pbl, *tmp;
2158 
2159 	if (info->pbl_table)
2160 		list_add_tail(&info->pbl_table->list_entry,
2161 			      &info->free_pbl_list);
2162 
2163 	if (!list_empty(&info->inuse_pbl_list))
2164 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2165 
2166 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2167 		list_del(&pbl->list_entry);
2168 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2169 	}
2170 }
2171 
2172 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2173 			size_t page_list_len, bool two_layered)
2174 {
2175 	struct qedr_pbl *tmp;
2176 	int rc;
2177 
2178 	INIT_LIST_HEAD(&info->free_pbl_list);
2179 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2180 
2181 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2182 				  page_list_len, two_layered);
2183 	if (rc)
2184 		goto done;
2185 
2186 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2187 	if (IS_ERR(info->pbl_table)) {
2188 		rc = PTR_ERR(info->pbl_table);
2189 		goto done;
2190 	}
2191 
2192 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2193 		 &info->pbl_table->pa);
2194 
2195 	/* in usual case we use 2 PBLs, so we add one to free
2196 	 * list and allocating another one
2197 	 */
2198 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2199 	if (IS_ERR(tmp)) {
2200 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2201 		goto done;
2202 	}
2203 
2204 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2205 
2206 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2207 
2208 done:
2209 	if (rc)
2210 		free_mr_info(dev, info);
2211 
2212 	return rc;
2213 }
2214 
2215 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2216 			       u64 usr_addr, int acc, struct ib_udata *udata)
2217 {
2218 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2219 	struct qedr_mr *mr;
2220 	struct qedr_pd *pd;
2221 	int rc = -ENOMEM;
2222 
2223 	pd = get_qedr_pd(ibpd);
2224 	DP_DEBUG(dev, QEDR_MSG_MR,
2225 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2226 		 pd->pd_id, start, len, usr_addr, acc);
2227 
2228 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2229 		return ERR_PTR(-EINVAL);
2230 
2231 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2232 	if (!mr)
2233 		return ERR_PTR(rc);
2234 
2235 	mr->type = QEDR_MR_USER;
2236 
2237 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2238 	if (IS_ERR(mr->umem)) {
2239 		rc = -EFAULT;
2240 		goto err0;
2241 	}
2242 
2243 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2244 	if (rc)
2245 		goto err1;
2246 
2247 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2248 			   &mr->info.pbl_info, mr->umem->page_shift);
2249 
2250 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2251 	if (rc) {
2252 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2253 		goto err1;
2254 	}
2255 
2256 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2257 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2258 	mr->hw_mr.key = 0;
2259 	mr->hw_mr.pd = pd->pd_id;
2260 	mr->hw_mr.local_read = 1;
2261 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2262 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2263 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2264 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2265 	mr->hw_mr.mw_bind = false;
2266 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2267 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2268 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2269 	mr->hw_mr.page_size_log = mr->umem->page_shift;
2270 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2271 	mr->hw_mr.length = len;
2272 	mr->hw_mr.vaddr = usr_addr;
2273 	mr->hw_mr.zbva = false;
2274 	mr->hw_mr.phy_mr = false;
2275 	mr->hw_mr.dma_mr = false;
2276 
2277 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2278 	if (rc) {
2279 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2280 		goto err2;
2281 	}
2282 
2283 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2284 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2285 	    mr->hw_mr.remote_atomic)
2286 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2287 
2288 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2289 		 mr->ibmr.lkey);
2290 	return &mr->ibmr;
2291 
2292 err2:
2293 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2294 err1:
2295 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2296 err0:
2297 	kfree(mr);
2298 	return ERR_PTR(rc);
2299 }
2300 
2301 int qedr_dereg_mr(struct ib_mr *ib_mr)
2302 {
2303 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2304 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2305 	int rc = 0;
2306 
2307 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2308 	if (rc)
2309 		return rc;
2310 
2311 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2312 
2313 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2314 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2315 
2316 	/* it could be user registered memory. */
2317 	if (mr->umem)
2318 		ib_umem_release(mr->umem);
2319 
2320 	kfree(mr);
2321 
2322 	return rc;
2323 }
2324 
2325 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2326 				       int max_page_list_len)
2327 {
2328 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2329 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2330 	struct qedr_mr *mr;
2331 	int rc = -ENOMEM;
2332 
2333 	DP_DEBUG(dev, QEDR_MSG_MR,
2334 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2335 		 max_page_list_len);
2336 
2337 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2338 	if (!mr)
2339 		return ERR_PTR(rc);
2340 
2341 	mr->dev = dev;
2342 	mr->type = QEDR_MR_FRMR;
2343 
2344 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2345 	if (rc)
2346 		goto err0;
2347 
2348 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2349 	if (rc) {
2350 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2351 		goto err0;
2352 	}
2353 
2354 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2355 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2356 	mr->hw_mr.key = 0;
2357 	mr->hw_mr.pd = pd->pd_id;
2358 	mr->hw_mr.local_read = 1;
2359 	mr->hw_mr.local_write = 0;
2360 	mr->hw_mr.remote_read = 0;
2361 	mr->hw_mr.remote_write = 0;
2362 	mr->hw_mr.remote_atomic = 0;
2363 	mr->hw_mr.mw_bind = false;
2364 	mr->hw_mr.pbl_ptr = 0;
2365 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2366 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2367 	mr->hw_mr.fbo = 0;
2368 	mr->hw_mr.length = 0;
2369 	mr->hw_mr.vaddr = 0;
2370 	mr->hw_mr.zbva = false;
2371 	mr->hw_mr.phy_mr = true;
2372 	mr->hw_mr.dma_mr = false;
2373 
2374 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2375 	if (rc) {
2376 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2377 		goto err1;
2378 	}
2379 
2380 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2381 	mr->ibmr.rkey = mr->ibmr.lkey;
2382 
2383 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2384 	return mr;
2385 
2386 err1:
2387 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2388 err0:
2389 	kfree(mr);
2390 	return ERR_PTR(rc);
2391 }
2392 
2393 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2394 			    enum ib_mr_type mr_type, u32 max_num_sg)
2395 {
2396 	struct qedr_dev *dev;
2397 	struct qedr_mr *mr;
2398 
2399 	if (mr_type != IB_MR_TYPE_MEM_REG)
2400 		return ERR_PTR(-EINVAL);
2401 
2402 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2403 
2404 	if (IS_ERR(mr))
2405 		return ERR_PTR(-EINVAL);
2406 
2407 	dev = mr->dev;
2408 
2409 	return &mr->ibmr;
2410 }
2411 
2412 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2413 {
2414 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2415 	struct qedr_pbl *pbl_table;
2416 	struct regpair *pbe;
2417 	u32 pbes_in_page;
2418 
2419 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2420 		DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2421 		return -ENOMEM;
2422 	}
2423 
2424 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2425 		 mr->npages, addr);
2426 
2427 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2428 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2429 	pbe = (struct regpair *)pbl_table->va;
2430 	pbe +=  mr->npages % pbes_in_page;
2431 	pbe->lo = cpu_to_le32((u32)addr);
2432 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2433 
2434 	mr->npages++;
2435 
2436 	return 0;
2437 }
2438 
2439 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2440 {
2441 	int work = info->completed - info->completed_handled - 1;
2442 
2443 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2444 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2445 		struct qedr_pbl *pbl;
2446 
2447 		/* Free all the page list that are possible to be freed
2448 		 * (all the ones that were invalidated), under the assumption
2449 		 * that if an FMR was completed successfully that means that
2450 		 * if there was an invalidate operation before it also ended
2451 		 */
2452 		pbl = list_first_entry(&info->inuse_pbl_list,
2453 				       struct qedr_pbl, list_entry);
2454 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2455 		info->completed_handled++;
2456 	}
2457 }
2458 
2459 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2460 		   int sg_nents, unsigned int *sg_offset)
2461 {
2462 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2463 
2464 	mr->npages = 0;
2465 
2466 	handle_completed_mrs(mr->dev, &mr->info);
2467 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2468 }
2469 
2470 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2471 {
2472 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2473 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2474 	struct qedr_mr *mr;
2475 	int rc;
2476 
2477 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2478 	if (!mr)
2479 		return ERR_PTR(-ENOMEM);
2480 
2481 	mr->type = QEDR_MR_DMA;
2482 
2483 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2484 	if (rc) {
2485 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2486 		goto err1;
2487 	}
2488 
2489 	/* index only, 18 bit long, lkey = itid << 8 | key */
2490 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2491 	mr->hw_mr.pd = pd->pd_id;
2492 	mr->hw_mr.local_read = 1;
2493 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2494 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2495 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2496 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2497 	mr->hw_mr.dma_mr = true;
2498 
2499 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2500 	if (rc) {
2501 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2502 		goto err2;
2503 	}
2504 
2505 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2506 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2507 	    mr->hw_mr.remote_atomic)
2508 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2509 
2510 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2511 	return &mr->ibmr;
2512 
2513 err2:
2514 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2515 err1:
2516 	kfree(mr);
2517 	return ERR_PTR(rc);
2518 }
2519 
2520 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2521 {
2522 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2523 }
2524 
2525 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2526 {
2527 	int i, len = 0;
2528 
2529 	for (i = 0; i < num_sge; i++)
2530 		len += sg_list[i].length;
2531 
2532 	return len;
2533 }
2534 
2535 static void swap_wqe_data64(u64 *p)
2536 {
2537 	int i;
2538 
2539 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2540 		*p = cpu_to_be64(cpu_to_le64(*p));
2541 }
2542 
2543 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2544 				       struct qedr_qp *qp, u8 *wqe_size,
2545 				       struct ib_send_wr *wr,
2546 				       struct ib_send_wr **bad_wr, u8 *bits,
2547 				       u8 bit)
2548 {
2549 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2550 	char *seg_prt, *wqe;
2551 	int i, seg_siz;
2552 
2553 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2554 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2555 		*bad_wr = wr;
2556 		return 0;
2557 	}
2558 
2559 	if (!data_size)
2560 		return data_size;
2561 
2562 	*bits |= bit;
2563 
2564 	seg_prt = NULL;
2565 	wqe = NULL;
2566 	seg_siz = 0;
2567 
2568 	/* Copy data inline */
2569 	for (i = 0; i < wr->num_sge; i++) {
2570 		u32 len = wr->sg_list[i].length;
2571 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2572 
2573 		while (len > 0) {
2574 			u32 cur;
2575 
2576 			/* New segment required */
2577 			if (!seg_siz) {
2578 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2579 				seg_prt = wqe;
2580 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2581 				(*wqe_size)++;
2582 			}
2583 
2584 			/* Calculate currently allowed length */
2585 			cur = min_t(u32, len, seg_siz);
2586 			memcpy(seg_prt, src, cur);
2587 
2588 			/* Update segment variables */
2589 			seg_prt += cur;
2590 			seg_siz -= cur;
2591 
2592 			/* Update sge variables */
2593 			src += cur;
2594 			len -= cur;
2595 
2596 			/* Swap fully-completed segments */
2597 			if (!seg_siz)
2598 				swap_wqe_data64((u64 *)wqe);
2599 		}
2600 	}
2601 
2602 	/* swap last not completed segment */
2603 	if (seg_siz)
2604 		swap_wqe_data64((u64 *)wqe);
2605 
2606 	return data_size;
2607 }
2608 
2609 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2610 	do {							\
2611 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2612 		(sge)->length = cpu_to_le32(vlength);		\
2613 		(sge)->flags = cpu_to_le32(vflags);		\
2614 	} while (0)
2615 
2616 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2617 	do {							\
2618 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2619 		(hdr)->num_sges = num_sge;			\
2620 	} while (0)
2621 
2622 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2623 	do {							\
2624 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2625 		(sge)->length = cpu_to_le32(vlength);		\
2626 		(sge)->l_key = cpu_to_le32(vlkey);		\
2627 	} while (0)
2628 
2629 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2630 				struct ib_send_wr *wr)
2631 {
2632 	u32 data_size = 0;
2633 	int i;
2634 
2635 	for (i = 0; i < wr->num_sge; i++) {
2636 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2637 
2638 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2639 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2640 		sge->length = cpu_to_le32(wr->sg_list[i].length);
2641 		data_size += wr->sg_list[i].length;
2642 	}
2643 
2644 	if (wqe_size)
2645 		*wqe_size += wr->num_sge;
2646 
2647 	return data_size;
2648 }
2649 
2650 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2651 				     struct qedr_qp *qp,
2652 				     struct rdma_sq_rdma_wqe_1st *rwqe,
2653 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2654 				     struct ib_send_wr *wr,
2655 				     struct ib_send_wr **bad_wr)
2656 {
2657 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2658 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2659 
2660 	if (wr->send_flags & IB_SEND_INLINE &&
2661 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2662 	     wr->opcode == IB_WR_RDMA_WRITE)) {
2663 		u8 flags = 0;
2664 
2665 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2666 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2667 						   bad_wr, &rwqe->flags, flags);
2668 	}
2669 
2670 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2671 }
2672 
2673 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2674 				     struct qedr_qp *qp,
2675 				     struct rdma_sq_send_wqe_1st *swqe,
2676 				     struct rdma_sq_send_wqe_2st *swqe2,
2677 				     struct ib_send_wr *wr,
2678 				     struct ib_send_wr **bad_wr)
2679 {
2680 	memset(swqe2, 0, sizeof(*swqe2));
2681 	if (wr->send_flags & IB_SEND_INLINE) {
2682 		u8 flags = 0;
2683 
2684 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2685 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2686 						   bad_wr, &swqe->flags, flags);
2687 	}
2688 
2689 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2690 }
2691 
2692 static int qedr_prepare_reg(struct qedr_qp *qp,
2693 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2694 			    struct ib_reg_wr *wr)
2695 {
2696 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2697 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2698 
2699 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2700 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2701 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2702 	fwqe1->l_key = wr->key;
2703 
2704 	fwqe2->access_ctrl = 0;
2705 
2706 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2707 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2708 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2709 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2710 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2711 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2712 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2713 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2714 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2715 	fwqe2->fmr_ctrl = 0;
2716 
2717 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2718 		   ilog2(mr->ibmr.page_size) - 12);
2719 
2720 	fwqe2->length_hi = 0;
2721 	fwqe2->length_lo = mr->ibmr.length;
2722 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2723 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2724 
2725 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2726 
2727 	return 0;
2728 }
2729 
2730 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2731 {
2732 	switch (opcode) {
2733 	case IB_WR_RDMA_WRITE:
2734 	case IB_WR_RDMA_WRITE_WITH_IMM:
2735 		return IB_WC_RDMA_WRITE;
2736 	case IB_WR_SEND_WITH_IMM:
2737 	case IB_WR_SEND:
2738 	case IB_WR_SEND_WITH_INV:
2739 		return IB_WC_SEND;
2740 	case IB_WR_RDMA_READ:
2741 		return IB_WC_RDMA_READ;
2742 	case IB_WR_ATOMIC_CMP_AND_SWP:
2743 		return IB_WC_COMP_SWAP;
2744 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2745 		return IB_WC_FETCH_ADD;
2746 	case IB_WR_REG_MR:
2747 		return IB_WC_REG_MR;
2748 	case IB_WR_LOCAL_INV:
2749 		return IB_WC_LOCAL_INV;
2750 	default:
2751 		return IB_WC_SEND;
2752 	}
2753 }
2754 
2755 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2756 {
2757 	int wq_is_full, err_wr, pbl_is_full;
2758 	struct qedr_dev *dev = qp->dev;
2759 
2760 	/* prevent SQ overflow and/or processing of a bad WR */
2761 	err_wr = wr->num_sge > qp->sq.max_sges;
2762 	wq_is_full = qedr_wq_is_full(&qp->sq);
2763 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2764 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2765 	if (wq_is_full || err_wr || pbl_is_full) {
2766 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2767 			DP_ERR(dev,
2768 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2769 			       qp);
2770 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2771 		}
2772 
2773 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2774 			DP_ERR(dev,
2775 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2776 			       qp);
2777 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2778 		}
2779 
2780 		if (pbl_is_full &&
2781 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2782 			DP_ERR(dev,
2783 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2784 			       qp);
2785 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2786 		}
2787 		return false;
2788 	}
2789 	return true;
2790 }
2791 
2792 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2793 		     struct ib_send_wr **bad_wr)
2794 {
2795 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2796 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2797 	struct rdma_sq_atomic_wqe_1st *awqe1;
2798 	struct rdma_sq_atomic_wqe_2nd *awqe2;
2799 	struct rdma_sq_atomic_wqe_3rd *awqe3;
2800 	struct rdma_sq_send_wqe_2st *swqe2;
2801 	struct rdma_sq_local_inv_wqe *iwqe;
2802 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2803 	struct rdma_sq_send_wqe_1st *swqe;
2804 	struct rdma_sq_rdma_wqe_1st *rwqe;
2805 	struct rdma_sq_fmr_wqe_1st *fwqe1;
2806 	struct rdma_sq_common_wqe *wqe;
2807 	u32 length;
2808 	int rc = 0;
2809 	bool comp;
2810 
2811 	if (!qedr_can_post_send(qp, wr)) {
2812 		*bad_wr = wr;
2813 		return -ENOMEM;
2814 	}
2815 
2816 	wqe = qed_chain_produce(&qp->sq.pbl);
2817 	qp->wqe_wr_id[qp->sq.prod].signaled =
2818 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2819 
2820 	wqe->flags = 0;
2821 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2822 		   !!(wr->send_flags & IB_SEND_SOLICITED));
2823 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2824 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2825 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2826 		   !!(wr->send_flags & IB_SEND_FENCE));
2827 	wqe->prev_wqe_size = qp->prev_wqe_size;
2828 
2829 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2830 
2831 	switch (wr->opcode) {
2832 	case IB_WR_SEND_WITH_IMM:
2833 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
2834 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2835 		swqe->wqe_size = 2;
2836 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2837 
2838 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
2839 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2840 						   wr, bad_wr);
2841 		swqe->length = cpu_to_le32(length);
2842 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2843 		qp->prev_wqe_size = swqe->wqe_size;
2844 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2845 		break;
2846 	case IB_WR_SEND:
2847 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
2848 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2849 
2850 		swqe->wqe_size = 2;
2851 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2852 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2853 						   wr, bad_wr);
2854 		swqe->length = cpu_to_le32(length);
2855 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2856 		qp->prev_wqe_size = swqe->wqe_size;
2857 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2858 		break;
2859 	case IB_WR_SEND_WITH_INV:
2860 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
2861 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2862 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2863 		swqe->wqe_size = 2;
2864 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
2865 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2866 						   wr, bad_wr);
2867 		swqe->length = cpu_to_le32(length);
2868 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2869 		qp->prev_wqe_size = swqe->wqe_size;
2870 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2871 		break;
2872 
2873 	case IB_WR_RDMA_WRITE_WITH_IMM:
2874 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
2875 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2876 
2877 		rwqe->wqe_size = 2;
2878 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
2879 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2880 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2881 						   wr, bad_wr);
2882 		rwqe->length = cpu_to_le32(length);
2883 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2884 		qp->prev_wqe_size = rwqe->wqe_size;
2885 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2886 		break;
2887 	case IB_WR_RDMA_WRITE:
2888 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
2889 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2890 
2891 		rwqe->wqe_size = 2;
2892 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2893 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2894 						   wr, bad_wr);
2895 		rwqe->length = cpu_to_le32(length);
2896 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2897 		qp->prev_wqe_size = rwqe->wqe_size;
2898 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2899 		break;
2900 	case IB_WR_RDMA_READ_WITH_INV:
2901 		DP_ERR(dev,
2902 		       "RDMA READ WITH INVALIDATE not supported\n");
2903 		*bad_wr = wr;
2904 		rc = -EINVAL;
2905 		break;
2906 
2907 	case IB_WR_RDMA_READ:
2908 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
2909 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2910 
2911 		rwqe->wqe_size = 2;
2912 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2913 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2914 						   wr, bad_wr);
2915 		rwqe->length = cpu_to_le32(length);
2916 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2917 		qp->prev_wqe_size = rwqe->wqe_size;
2918 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2919 		break;
2920 
2921 	case IB_WR_ATOMIC_CMP_AND_SWP:
2922 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2923 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
2924 		awqe1->wqe_size = 4;
2925 
2926 		awqe2 = qed_chain_produce(&qp->sq.pbl);
2927 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
2928 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
2929 
2930 		awqe3 = qed_chain_produce(&qp->sq.pbl);
2931 
2932 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2933 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
2934 			DMA_REGPAIR_LE(awqe3->swap_data,
2935 				       atomic_wr(wr)->compare_add);
2936 		} else {
2937 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
2938 			DMA_REGPAIR_LE(awqe3->swap_data,
2939 				       atomic_wr(wr)->swap);
2940 			DMA_REGPAIR_LE(awqe3->cmp_data,
2941 				       atomic_wr(wr)->compare_add);
2942 		}
2943 
2944 		qedr_prepare_sq_sges(qp, NULL, wr);
2945 
2946 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
2947 		qp->prev_wqe_size = awqe1->wqe_size;
2948 		break;
2949 
2950 	case IB_WR_LOCAL_INV:
2951 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
2952 		iwqe->wqe_size = 1;
2953 
2954 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
2955 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
2956 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
2957 		qp->prev_wqe_size = iwqe->wqe_size;
2958 		break;
2959 	case IB_WR_REG_MR:
2960 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
2961 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
2962 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
2963 		fwqe1->wqe_size = 2;
2964 
2965 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
2966 		if (rc) {
2967 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
2968 			*bad_wr = wr;
2969 			break;
2970 		}
2971 
2972 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
2973 		qp->prev_wqe_size = fwqe1->wqe_size;
2974 		break;
2975 	default:
2976 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
2977 		rc = -EINVAL;
2978 		*bad_wr = wr;
2979 		break;
2980 	}
2981 
2982 	if (*bad_wr) {
2983 		u16 value;
2984 
2985 		/* Restore prod to its position before
2986 		 * this WR was processed
2987 		 */
2988 		value = le16_to_cpu(qp->sq.db_data.data.value);
2989 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
2990 
2991 		/* Restore prev_wqe_size */
2992 		qp->prev_wqe_size = wqe->prev_wqe_size;
2993 		rc = -EINVAL;
2994 		DP_ERR(dev, "POST SEND FAILED\n");
2995 	}
2996 
2997 	return rc;
2998 }
2999 
3000 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3001 		   struct ib_send_wr **bad_wr)
3002 {
3003 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3004 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3005 	unsigned long flags;
3006 	int rc = 0;
3007 
3008 	*bad_wr = NULL;
3009 
3010 	if (qp->qp_type == IB_QPT_GSI)
3011 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3012 
3013 	spin_lock_irqsave(&qp->q_lock, flags);
3014 
3015 	if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3016 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3017 	    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3018 		spin_unlock_irqrestore(&qp->q_lock, flags);
3019 		*bad_wr = wr;
3020 		DP_DEBUG(dev, QEDR_MSG_CQ,
3021 			 "QP in wrong state! QP icid=0x%x state %d\n",
3022 			 qp->icid, qp->state);
3023 		return -EINVAL;
3024 	}
3025 
3026 	while (wr) {
3027 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3028 		if (rc)
3029 			break;
3030 
3031 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3032 
3033 		qedr_inc_sw_prod(&qp->sq);
3034 
3035 		qp->sq.db_data.data.value++;
3036 
3037 		wr = wr->next;
3038 	}
3039 
3040 	/* Trigger doorbell
3041 	 * If there was a failure in the first WR then it will be triggered in
3042 	 * vane. However this is not harmful (as long as the producer value is
3043 	 * unchanged). For performance reasons we avoid checking for this
3044 	 * redundant doorbell.
3045 	 */
3046 	wmb();
3047 	writel(qp->sq.db_data.raw, qp->sq.db);
3048 
3049 	/* Make sure write sticks */
3050 	mmiowb();
3051 
3052 	spin_unlock_irqrestore(&qp->q_lock, flags);
3053 
3054 	return rc;
3055 }
3056 
3057 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3058 		   struct ib_recv_wr **bad_wr)
3059 {
3060 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3061 	struct qedr_dev *dev = qp->dev;
3062 	unsigned long flags;
3063 	int status = 0;
3064 
3065 	if (qp->qp_type == IB_QPT_GSI)
3066 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3067 
3068 	spin_lock_irqsave(&qp->q_lock, flags);
3069 
3070 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3071 		spin_unlock_irqrestore(&qp->q_lock, flags);
3072 		*bad_wr = wr;
3073 		return -EINVAL;
3074 	}
3075 
3076 	while (wr) {
3077 		int i;
3078 
3079 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3080 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3081 		    wr->num_sge > qp->rq.max_sges) {
3082 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3083 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3084 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3085 			       qp->rq.max_sges);
3086 			status = -ENOMEM;
3087 			*bad_wr = wr;
3088 			break;
3089 		}
3090 		for (i = 0; i < wr->num_sge; i++) {
3091 			u32 flags = 0;
3092 			struct rdma_rq_sge *rqe =
3093 			    qed_chain_produce(&qp->rq.pbl);
3094 
3095 			/* First one must include the number
3096 			 * of SGE in the list
3097 			 */
3098 			if (!i)
3099 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3100 					  wr->num_sge);
3101 
3102 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3103 				  wr->sg_list[i].lkey);
3104 
3105 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3106 				   wr->sg_list[i].length, flags);
3107 		}
3108 
3109 		/* Special case of no sges. FW requires between 1-4 sges...
3110 		 * in this case we need to post 1 sge with length zero. this is
3111 		 * because rdma write with immediate consumes an RQ.
3112 		 */
3113 		if (!wr->num_sge) {
3114 			u32 flags = 0;
3115 			struct rdma_rq_sge *rqe =
3116 			    qed_chain_produce(&qp->rq.pbl);
3117 
3118 			/* First one must include the number
3119 			 * of SGE in the list
3120 			 */
3121 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3122 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3123 
3124 			RQ_SGE_SET(rqe, 0, 0, flags);
3125 			i = 1;
3126 		}
3127 
3128 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3129 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3130 
3131 		qedr_inc_sw_prod(&qp->rq);
3132 
3133 		/* Flush all the writes before signalling doorbell */
3134 		wmb();
3135 
3136 		qp->rq.db_data.data.value++;
3137 
3138 		writel(qp->rq.db_data.raw, qp->rq.db);
3139 
3140 		/* Make sure write sticks */
3141 		mmiowb();
3142 
3143 		wr = wr->next;
3144 	}
3145 
3146 	spin_unlock_irqrestore(&qp->q_lock, flags);
3147 
3148 	return status;
3149 }
3150 
3151 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3152 {
3153 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3154 
3155 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3156 		cq->pbl_toggle;
3157 }
3158 
3159 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3160 {
3161 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3162 	struct qedr_qp *qp;
3163 
3164 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3165 						   resp_cqe->qp_handle.lo,
3166 						   u64);
3167 	return qp;
3168 }
3169 
3170 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3171 {
3172 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3173 
3174 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3175 }
3176 
3177 /* Return latest CQE (needs processing) */
3178 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3179 {
3180 	return cq->latest_cqe;
3181 }
3182 
3183 /* In fmr we need to increase the number of fmr completed counter for the fmr
3184  * algorithm determining whether we can free a pbl or not.
3185  * we need to perform this whether the work request was signaled or not. for
3186  * this purpose we call this function from the condition that checks if a wr
3187  * should be skipped, to make sure we don't miss it ( possibly this fmr
3188  * operation was not signalted)
3189  */
3190 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3191 {
3192 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3193 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3194 }
3195 
3196 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3197 		       struct qedr_cq *cq, int num_entries,
3198 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3199 		       int force)
3200 {
3201 	u16 cnt = 0;
3202 
3203 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3204 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3205 			qedr_chk_if_fmr(qp);
3206 			/* skip WC */
3207 			goto next_cqe;
3208 		}
3209 
3210 		/* fill WC */
3211 		wc->status = status;
3212 		wc->vendor_err = 0;
3213 		wc->wc_flags = 0;
3214 		wc->src_qp = qp->id;
3215 		wc->qp = &qp->ibqp;
3216 
3217 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3218 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3219 
3220 		switch (wc->opcode) {
3221 		case IB_WC_RDMA_WRITE:
3222 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3223 			break;
3224 		case IB_WC_COMP_SWAP:
3225 		case IB_WC_FETCH_ADD:
3226 			wc->byte_len = 8;
3227 			break;
3228 		case IB_WC_REG_MR:
3229 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3230 			break;
3231 		case IB_WC_RDMA_READ:
3232 		case IB_WC_SEND:
3233 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3234 			break;
3235 		default:
3236 			break;
3237 		}
3238 
3239 		num_entries--;
3240 		wc++;
3241 		cnt++;
3242 next_cqe:
3243 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3244 			qed_chain_consume(&qp->sq.pbl);
3245 		qedr_inc_sw_cons(&qp->sq);
3246 	}
3247 
3248 	return cnt;
3249 }
3250 
3251 static int qedr_poll_cq_req(struct qedr_dev *dev,
3252 			    struct qedr_qp *qp, struct qedr_cq *cq,
3253 			    int num_entries, struct ib_wc *wc,
3254 			    struct rdma_cqe_requester *req)
3255 {
3256 	int cnt = 0;
3257 
3258 	switch (req->status) {
3259 	case RDMA_CQE_REQ_STS_OK:
3260 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3261 				  IB_WC_SUCCESS, 0);
3262 		break;
3263 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3264 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3265 			DP_ERR(dev,
3266 			       "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3267 			       cq->icid, qp->icid);
3268 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3269 				  IB_WC_WR_FLUSH_ERR, 1);
3270 		break;
3271 	default:
3272 		/* process all WQE before the cosumer */
3273 		qp->state = QED_ROCE_QP_STATE_ERR;
3274 		cnt = process_req(dev, qp, cq, num_entries, wc,
3275 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3276 		wc += cnt;
3277 		/* if we have extra WC fill it with actual error info */
3278 		if (cnt < num_entries) {
3279 			enum ib_wc_status wc_status;
3280 
3281 			switch (req->status) {
3282 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3283 				DP_ERR(dev,
3284 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3285 				       cq->icid, qp->icid);
3286 				wc_status = IB_WC_BAD_RESP_ERR;
3287 				break;
3288 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3289 				DP_ERR(dev,
3290 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3291 				       cq->icid, qp->icid);
3292 				wc_status = IB_WC_LOC_LEN_ERR;
3293 				break;
3294 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3295 				DP_ERR(dev,
3296 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3297 				       cq->icid, qp->icid);
3298 				wc_status = IB_WC_LOC_QP_OP_ERR;
3299 				break;
3300 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3301 				DP_ERR(dev,
3302 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3303 				       cq->icid, qp->icid);
3304 				wc_status = IB_WC_LOC_PROT_ERR;
3305 				break;
3306 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3307 				DP_ERR(dev,
3308 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3309 				       cq->icid, qp->icid);
3310 				wc_status = IB_WC_MW_BIND_ERR;
3311 				break;
3312 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3313 				DP_ERR(dev,
3314 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3315 				       cq->icid, qp->icid);
3316 				wc_status = IB_WC_REM_INV_REQ_ERR;
3317 				break;
3318 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3319 				DP_ERR(dev,
3320 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3321 				       cq->icid, qp->icid);
3322 				wc_status = IB_WC_REM_ACCESS_ERR;
3323 				break;
3324 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3325 				DP_ERR(dev,
3326 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3327 				       cq->icid, qp->icid);
3328 				wc_status = IB_WC_REM_OP_ERR;
3329 				break;
3330 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3331 				DP_ERR(dev,
3332 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3333 				       cq->icid, qp->icid);
3334 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3335 				break;
3336 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3337 				DP_ERR(dev,
3338 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3339 				       cq->icid, qp->icid);
3340 				wc_status = IB_WC_RETRY_EXC_ERR;
3341 				break;
3342 			default:
3343 				DP_ERR(dev,
3344 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3345 				       cq->icid, qp->icid);
3346 				wc_status = IB_WC_GENERAL_ERR;
3347 			}
3348 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3349 					   wc_status, 1);
3350 		}
3351 	}
3352 
3353 	return cnt;
3354 }
3355 
3356 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3357 {
3358 	switch (status) {
3359 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3360 		return IB_WC_LOC_ACCESS_ERR;
3361 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3362 		return IB_WC_LOC_LEN_ERR;
3363 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3364 		return IB_WC_LOC_QP_OP_ERR;
3365 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3366 		return IB_WC_LOC_PROT_ERR;
3367 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3368 		return IB_WC_MW_BIND_ERR;
3369 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3370 		return IB_WC_REM_INV_RD_REQ_ERR;
3371 	case RDMA_CQE_RESP_STS_OK:
3372 		return IB_WC_SUCCESS;
3373 	default:
3374 		return IB_WC_GENERAL_ERR;
3375 	}
3376 }
3377 
3378 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3379 					  struct ib_wc *wc)
3380 {
3381 	wc->status = IB_WC_SUCCESS;
3382 	wc->byte_len = le32_to_cpu(resp->length);
3383 
3384 	if (resp->flags & QEDR_RESP_IMM) {
3385 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3386 		wc->wc_flags |= IB_WC_WITH_IMM;
3387 
3388 		if (resp->flags & QEDR_RESP_RDMA)
3389 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3390 
3391 		if (resp->flags & QEDR_RESP_INV)
3392 			return -EINVAL;
3393 
3394 	} else if (resp->flags & QEDR_RESP_INV) {
3395 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3396 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3397 
3398 		if (resp->flags & QEDR_RESP_RDMA)
3399 			return -EINVAL;
3400 
3401 	} else if (resp->flags & QEDR_RESP_RDMA) {
3402 		return -EINVAL;
3403 	}
3404 
3405 	return 0;
3406 }
3407 
3408 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3409 			       struct qedr_cq *cq, struct ib_wc *wc,
3410 			       struct rdma_cqe_responder *resp, u64 wr_id)
3411 {
3412 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3413 	wc->opcode = IB_WC_RECV;
3414 	wc->wc_flags = 0;
3415 
3416 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3417 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3418 			DP_ERR(dev,
3419 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3420 			       cq, cq->icid, resp->flags);
3421 
3422 	} else {
3423 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3424 		if (wc->status == IB_WC_GENERAL_ERR)
3425 			DP_ERR(dev,
3426 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3427 			       cq, cq->icid, resp->status);
3428 	}
3429 
3430 	/* Fill the rest of the WC */
3431 	wc->vendor_err = 0;
3432 	wc->src_qp = qp->id;
3433 	wc->qp = &qp->ibqp;
3434 	wc->wr_id = wr_id;
3435 }
3436 
3437 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3438 			    struct qedr_cq *cq, struct ib_wc *wc,
3439 			    struct rdma_cqe_responder *resp)
3440 {
3441 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3442 
3443 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3444 
3445 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3446 		qed_chain_consume(&qp->rq.pbl);
3447 	qedr_inc_sw_cons(&qp->rq);
3448 
3449 	return 1;
3450 }
3451 
3452 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3453 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3454 {
3455 	u16 cnt = 0;
3456 
3457 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3458 		/* fill WC */
3459 		wc->status = IB_WC_WR_FLUSH_ERR;
3460 		wc->vendor_err = 0;
3461 		wc->wc_flags = 0;
3462 		wc->src_qp = qp->id;
3463 		wc->byte_len = 0;
3464 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3465 		wc->qp = &qp->ibqp;
3466 		num_entries--;
3467 		wc++;
3468 		cnt++;
3469 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3470 			qed_chain_consume(&qp->rq.pbl);
3471 		qedr_inc_sw_cons(&qp->rq);
3472 	}
3473 
3474 	return cnt;
3475 }
3476 
3477 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3478 				 struct rdma_cqe_responder *resp, int *update)
3479 {
3480 	if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
3481 		consume_cqe(cq);
3482 		*update |= 1;
3483 	}
3484 }
3485 
3486 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3487 			     struct qedr_cq *cq, int num_entries,
3488 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3489 			     int *update)
3490 {
3491 	int cnt;
3492 
3493 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3494 		cnt = process_resp_flush(qp, cq, num_entries, wc,
3495 					 resp->rq_cons);
3496 		try_consume_resp_cqe(cq, qp, resp, update);
3497 	} else {
3498 		cnt = process_resp_one(dev, qp, cq, wc, resp);
3499 		consume_cqe(cq);
3500 		*update |= 1;
3501 	}
3502 
3503 	return cnt;
3504 }
3505 
3506 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3507 				struct rdma_cqe_requester *req, int *update)
3508 {
3509 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3510 		consume_cqe(cq);
3511 		*update |= 1;
3512 	}
3513 }
3514 
3515 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3516 {
3517 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3518 	struct qedr_cq *cq = get_qedr_cq(ibcq);
3519 	union rdma_cqe *cqe = cq->latest_cqe;
3520 	u32 old_cons, new_cons;
3521 	unsigned long flags;
3522 	int update = 0;
3523 	int done = 0;
3524 
3525 	if (cq->destroyed) {
3526 		DP_ERR(dev,
3527 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3528 		       cq, cq->icid);
3529 		return 0;
3530 	}
3531 
3532 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3533 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3534 
3535 	spin_lock_irqsave(&cq->cq_lock, flags);
3536 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3537 	while (num_entries && is_valid_cqe(cq, cqe)) {
3538 		struct qedr_qp *qp;
3539 		int cnt = 0;
3540 
3541 		/* prevent speculative reads of any field of CQE */
3542 		rmb();
3543 
3544 		qp = cqe_get_qp(cqe);
3545 		if (!qp) {
3546 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3547 			break;
3548 		}
3549 
3550 		wc->qp = &qp->ibqp;
3551 
3552 		switch (cqe_get_type(cqe)) {
3553 		case RDMA_CQE_TYPE_REQUESTER:
3554 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3555 					       &cqe->req);
3556 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3557 			break;
3558 		case RDMA_CQE_TYPE_RESPONDER_RQ:
3559 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3560 						&cqe->resp, &update);
3561 			break;
3562 		case RDMA_CQE_TYPE_INVALID:
3563 		default:
3564 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3565 			       cqe_get_type(cqe));
3566 		}
3567 		num_entries -= cnt;
3568 		wc += cnt;
3569 		done += cnt;
3570 
3571 		cqe = get_cqe(cq);
3572 	}
3573 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3574 
3575 	cq->cq_cons += new_cons - old_cons;
3576 
3577 	if (update)
3578 		/* doorbell notifies abount latest VALID entry,
3579 		 * but chain already point to the next INVALID one
3580 		 */
3581 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3582 
3583 	spin_unlock_irqrestore(&cq->cq_lock, flags);
3584 	return done;
3585 }
3586 
3587 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3588 		     u8 port_num,
3589 		     const struct ib_wc *in_wc,
3590 		     const struct ib_grh *in_grh,
3591 		     const struct ib_mad_hdr *mad_hdr,
3592 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3593 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3594 {
3595 	struct qedr_dev *dev = get_qedr_dev(ibdev);
3596 
3597 	DP_DEBUG(dev, QEDR_MSG_GSI,
3598 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3599 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3600 		 mad_hdr->class_specific, mad_hdr->class_version,
3601 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3602 	return IB_MAD_RESULT_SUCCESS;
3603 }
3604 
3605 int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
3606 			struct ib_port_immutable *immutable)
3607 {
3608 	struct ib_port_attr attr;
3609 	int err;
3610 
3611 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3612 				    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3613 
3614 	err = ib_query_port(ibdev, port_num, &attr);
3615 	if (err)
3616 		return err;
3617 
3618 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
3619 	immutable->gid_tbl_len = attr.gid_tbl_len;
3620 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
3621 
3622 	return 0;
3623 }
3624