xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 5a244f48)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_cm.h"
53 
54 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57 					size_t len)
58 {
59 	size_t min_len = min_t(size_t, len, udata->outlen);
60 
61 	return ib_copy_to_udata(udata, src, min_len);
62 }
63 
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
65 {
66 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67 		return -EINVAL;
68 
69 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
70 	return 0;
71 }
72 
73 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
74 		   union ib_gid *sgid)
75 {
76 	struct qedr_dev *dev = get_qedr_dev(ibdev);
77 	int rc = 0;
78 
79 	if (!rdma_cap_roce_gid_table(ibdev, port))
80 		return -ENODEV;
81 
82 	rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
83 	if (rc == -EAGAIN) {
84 		memcpy(sgid, &zgid, sizeof(*sgid));
85 		return 0;
86 	}
87 
88 	DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
89 		 sgid->global.interface_id, sgid->global.subnet_prefix);
90 
91 	return rc;
92 }
93 
94 int qedr_add_gid(struct ib_device *device, u8 port_num,
95 		 unsigned int index, const union ib_gid *gid,
96 		 const struct ib_gid_attr *attr, void **context)
97 {
98 	if (!rdma_cap_roce_gid_table(device, port_num))
99 		return -EINVAL;
100 
101 	if (port_num > QEDR_MAX_PORT)
102 		return -EINVAL;
103 
104 	if (!context)
105 		return -EINVAL;
106 
107 	return 0;
108 }
109 
110 int qedr_del_gid(struct ib_device *device, u8 port_num,
111 		 unsigned int index, void **context)
112 {
113 	if (!rdma_cap_roce_gid_table(device, port_num))
114 		return -EINVAL;
115 
116 	if (port_num > QEDR_MAX_PORT)
117 		return -EINVAL;
118 
119 	if (!context)
120 		return -EINVAL;
121 
122 	return 0;
123 }
124 
125 int qedr_query_device(struct ib_device *ibdev,
126 		      struct ib_device_attr *attr, struct ib_udata *udata)
127 {
128 	struct qedr_dev *dev = get_qedr_dev(ibdev);
129 	struct qedr_device_attr *qattr = &dev->attr;
130 
131 	if (!dev->rdma_ctx) {
132 		DP_ERR(dev,
133 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
134 		       dev->rdma_ctx);
135 		return -EINVAL;
136 	}
137 
138 	memset(attr, 0, sizeof(*attr));
139 
140 	attr->fw_ver = qattr->fw_ver;
141 	attr->sys_image_guid = qattr->sys_image_guid;
142 	attr->max_mr_size = qattr->max_mr_size;
143 	attr->page_size_cap = qattr->page_size_caps;
144 	attr->vendor_id = qattr->vendor_id;
145 	attr->vendor_part_id = qattr->vendor_part_id;
146 	attr->hw_ver = qattr->hw_ver;
147 	attr->max_qp = qattr->max_qp;
148 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
149 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
150 	    IB_DEVICE_RC_RNR_NAK_GEN |
151 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
152 
153 	attr->max_sge = qattr->max_sge;
154 	attr->max_sge_rd = qattr->max_sge;
155 	attr->max_cq = qattr->max_cq;
156 	attr->max_cqe = qattr->max_cqe;
157 	attr->max_mr = qattr->max_mr;
158 	attr->max_mw = qattr->max_mw;
159 	attr->max_pd = qattr->max_pd;
160 	attr->atomic_cap = dev->atomic_cap;
161 	attr->max_fmr = qattr->max_fmr;
162 	attr->max_map_per_fmr = 16;
163 	attr->max_qp_init_rd_atom =
164 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
165 	attr->max_qp_rd_atom =
166 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
167 		attr->max_qp_init_rd_atom);
168 
169 	attr->max_srq = qattr->max_srq;
170 	attr->max_srq_sge = qattr->max_srq_sge;
171 	attr->max_srq_wr = qattr->max_srq_wr;
172 
173 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
174 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
175 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
176 	attr->max_ah = qattr->max_ah;
177 
178 	return 0;
179 }
180 
181 #define QEDR_SPEED_SDR		(1)
182 #define QEDR_SPEED_DDR		(2)
183 #define QEDR_SPEED_QDR		(4)
184 #define QEDR_SPEED_FDR10	(8)
185 #define QEDR_SPEED_FDR		(16)
186 #define QEDR_SPEED_EDR		(32)
187 
188 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
189 					    u8 *ib_width)
190 {
191 	switch (speed) {
192 	case 1000:
193 		*ib_speed = QEDR_SPEED_SDR;
194 		*ib_width = IB_WIDTH_1X;
195 		break;
196 	case 10000:
197 		*ib_speed = QEDR_SPEED_QDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 20000:
202 		*ib_speed = QEDR_SPEED_DDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	case 25000:
207 		*ib_speed = QEDR_SPEED_EDR;
208 		*ib_width = IB_WIDTH_1X;
209 		break;
210 
211 	case 40000:
212 		*ib_speed = QEDR_SPEED_QDR;
213 		*ib_width = IB_WIDTH_4X;
214 		break;
215 
216 	case 50000:
217 		*ib_speed = QEDR_SPEED_QDR;
218 		*ib_width = IB_WIDTH_4X;
219 		break;
220 
221 	case 100000:
222 		*ib_speed = QEDR_SPEED_EDR;
223 		*ib_width = IB_WIDTH_4X;
224 		break;
225 
226 	default:
227 		/* Unsupported */
228 		*ib_speed = QEDR_SPEED_SDR;
229 		*ib_width = IB_WIDTH_1X;
230 	}
231 }
232 
233 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
234 {
235 	struct qedr_dev *dev;
236 	struct qed_rdma_port *rdma_port;
237 
238 	dev = get_qedr_dev(ibdev);
239 	if (port > 1) {
240 		DP_ERR(dev, "invalid_port=0x%x\n", port);
241 		return -EINVAL;
242 	}
243 
244 	if (!dev->rdma_ctx) {
245 		DP_ERR(dev, "rdma_ctx is NULL\n");
246 		return -EINVAL;
247 	}
248 
249 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
250 
251 	/* *attr being zeroed by the caller, avoid zeroing it here */
252 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
253 		attr->state = IB_PORT_ACTIVE;
254 		attr->phys_state = 5;
255 	} else {
256 		attr->state = IB_PORT_DOWN;
257 		attr->phys_state = 3;
258 	}
259 	attr->max_mtu = IB_MTU_4096;
260 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
261 	attr->lid = 0;
262 	attr->lmc = 0;
263 	attr->sm_lid = 0;
264 	attr->sm_sl = 0;
265 	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
266 	attr->gid_tbl_len = QEDR_MAX_SGID;
267 	attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
268 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
269 	attr->qkey_viol_cntr = 0;
270 	get_link_speed_and_width(rdma_port->link_speed,
271 				 &attr->active_speed, &attr->active_width);
272 	attr->max_msg_sz = rdma_port->max_msg_size;
273 	attr->max_vl_num = 4;
274 
275 	return 0;
276 }
277 
278 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
279 		     struct ib_port_modify *props)
280 {
281 	struct qedr_dev *dev;
282 
283 	dev = get_qedr_dev(ibdev);
284 	if (port > 1) {
285 		DP_ERR(dev, "invalid_port=0x%x\n", port);
286 		return -EINVAL;
287 	}
288 
289 	return 0;
290 }
291 
292 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
293 			 unsigned long len)
294 {
295 	struct qedr_mm *mm;
296 
297 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
298 	if (!mm)
299 		return -ENOMEM;
300 
301 	mm->key.phy_addr = phy_addr;
302 	/* This function might be called with a length which is not a multiple
303 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
304 	 * forces this granularity by increasing the requested size if needed.
305 	 * When qedr_mmap is called, it will search the list with the updated
306 	 * length as a key. To prevent search failures, the length is rounded up
307 	 * in advance to PAGE_SIZE.
308 	 */
309 	mm->key.len = roundup(len, PAGE_SIZE);
310 	INIT_LIST_HEAD(&mm->entry);
311 
312 	mutex_lock(&uctx->mm_list_lock);
313 	list_add(&mm->entry, &uctx->mm_head);
314 	mutex_unlock(&uctx->mm_list_lock);
315 
316 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
317 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
318 		 (unsigned long long)mm->key.phy_addr,
319 		 (unsigned long)mm->key.len, uctx);
320 
321 	return 0;
322 }
323 
324 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
325 			     unsigned long len)
326 {
327 	bool found = false;
328 	struct qedr_mm *mm;
329 
330 	mutex_lock(&uctx->mm_list_lock);
331 	list_for_each_entry(mm, &uctx->mm_head, entry) {
332 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
333 			continue;
334 
335 		found = true;
336 		break;
337 	}
338 	mutex_unlock(&uctx->mm_list_lock);
339 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
340 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
341 		 mm->key.phy_addr, mm->key.len, uctx, found);
342 
343 	return found;
344 }
345 
346 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
347 					struct ib_udata *udata)
348 {
349 	int rc;
350 	struct qedr_ucontext *ctx;
351 	struct qedr_alloc_ucontext_resp uresp;
352 	struct qedr_dev *dev = get_qedr_dev(ibdev);
353 	struct qed_rdma_add_user_out_params oparams;
354 
355 	if (!udata)
356 		return ERR_PTR(-EFAULT);
357 
358 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
359 	if (!ctx)
360 		return ERR_PTR(-ENOMEM);
361 
362 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
363 	if (rc) {
364 		DP_ERR(dev,
365 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
366 		       rc);
367 		goto err;
368 	}
369 
370 	ctx->dpi = oparams.dpi;
371 	ctx->dpi_addr = oparams.dpi_addr;
372 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
373 	ctx->dpi_size = oparams.dpi_size;
374 	INIT_LIST_HEAD(&ctx->mm_head);
375 	mutex_init(&ctx->mm_list_lock);
376 
377 	memset(&uresp, 0, sizeof(uresp));
378 
379 	uresp.dpm_enabled = dev->user_dpm_enabled;
380 	uresp.wids_enabled = 1;
381 	uresp.wid_count = oparams.wid_count;
382 	uresp.db_pa = ctx->dpi_phys_addr;
383 	uresp.db_size = ctx->dpi_size;
384 	uresp.max_send_wr = dev->attr.max_sqe;
385 	uresp.max_recv_wr = dev->attr.max_rqe;
386 	uresp.max_srq_wr = dev->attr.max_srq_wr;
387 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
388 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
389 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
390 	uresp.max_cqes = QEDR_MAX_CQES;
391 
392 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
393 	if (rc)
394 		goto err;
395 
396 	ctx->dev = dev;
397 
398 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
399 	if (rc)
400 		goto err;
401 
402 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
403 		 &ctx->ibucontext);
404 	return &ctx->ibucontext;
405 
406 err:
407 	kfree(ctx);
408 	return ERR_PTR(rc);
409 }
410 
411 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
412 {
413 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
414 	struct qedr_mm *mm, *tmp;
415 	int status = 0;
416 
417 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
418 		 uctx);
419 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
420 
421 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
422 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
423 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
424 			 mm->key.phy_addr, mm->key.len, uctx);
425 		list_del(&mm->entry);
426 		kfree(mm);
427 	}
428 
429 	kfree(uctx);
430 	return status;
431 }
432 
433 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
434 {
435 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
436 	struct qedr_dev *dev = get_qedr_dev(context->device);
437 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
438 	u64 unmapped_db = dev->db_phys_addr;
439 	unsigned long len = (vma->vm_end - vma->vm_start);
440 	int rc = 0;
441 	bool found;
442 
443 	DP_DEBUG(dev, QEDR_MSG_INIT,
444 		 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
445 		 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
446 	if (vma->vm_start & (PAGE_SIZE - 1)) {
447 		DP_ERR(dev, "Vma_start not page aligned = %ld\n",
448 		       vma->vm_start);
449 		return -EINVAL;
450 	}
451 
452 	found = qedr_search_mmap(ucontext, vm_page, len);
453 	if (!found) {
454 		DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
455 		       vma->vm_pgoff);
456 		return -EINVAL;
457 	}
458 
459 	DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
460 
461 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
462 						     dev->db_size))) {
463 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
464 		if (vma->vm_flags & VM_READ) {
465 			DP_ERR(dev, "Trying to map doorbell bar for read\n");
466 			return -EPERM;
467 		}
468 
469 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
470 
471 		rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
472 					PAGE_SIZE, vma->vm_page_prot);
473 	} else {
474 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
475 		rc = remap_pfn_range(vma, vma->vm_start,
476 				     vma->vm_pgoff, len, vma->vm_page_prot);
477 	}
478 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
479 	return rc;
480 }
481 
482 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
483 			    struct ib_ucontext *context, struct ib_udata *udata)
484 {
485 	struct qedr_dev *dev = get_qedr_dev(ibdev);
486 	struct qedr_pd *pd;
487 	u16 pd_id;
488 	int rc;
489 
490 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
491 		 (udata && context) ? "User Lib" : "Kernel");
492 
493 	if (!dev->rdma_ctx) {
494 		DP_ERR(dev, "invalid RDMA context\n");
495 		return ERR_PTR(-EINVAL);
496 	}
497 
498 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
499 	if (!pd)
500 		return ERR_PTR(-ENOMEM);
501 
502 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
503 	if (rc)
504 		goto err;
505 
506 	pd->pd_id = pd_id;
507 
508 	if (udata && context) {
509 		struct qedr_alloc_pd_uresp uresp;
510 
511 		uresp.pd_id = pd_id;
512 
513 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
514 		if (rc) {
515 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
516 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
517 			goto err;
518 		}
519 
520 		pd->uctx = get_qedr_ucontext(context);
521 		pd->uctx->pd = pd;
522 	}
523 
524 	return &pd->ibpd;
525 
526 err:
527 	kfree(pd);
528 	return ERR_PTR(rc);
529 }
530 
531 int qedr_dealloc_pd(struct ib_pd *ibpd)
532 {
533 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
534 	struct qedr_pd *pd = get_qedr_pd(ibpd);
535 
536 	if (!pd) {
537 		pr_err("Invalid PD received in dealloc_pd\n");
538 		return -EINVAL;
539 	}
540 
541 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
542 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
543 
544 	kfree(pd);
545 
546 	return 0;
547 }
548 
549 static void qedr_free_pbl(struct qedr_dev *dev,
550 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
551 {
552 	struct pci_dev *pdev = dev->pdev;
553 	int i;
554 
555 	for (i = 0; i < pbl_info->num_pbls; i++) {
556 		if (!pbl[i].va)
557 			continue;
558 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
559 				  pbl[i].va, pbl[i].pa);
560 	}
561 
562 	kfree(pbl);
563 }
564 
565 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
566 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
567 
568 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
569 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
570 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
571 
572 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
573 					   struct qedr_pbl_info *pbl_info,
574 					   gfp_t flags)
575 {
576 	struct pci_dev *pdev = dev->pdev;
577 	struct qedr_pbl *pbl_table;
578 	dma_addr_t *pbl_main_tbl;
579 	dma_addr_t pa;
580 	void *va;
581 	int i;
582 
583 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
584 	if (!pbl_table)
585 		return ERR_PTR(-ENOMEM);
586 
587 	for (i = 0; i < pbl_info->num_pbls; i++) {
588 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
589 					&pa, flags);
590 		if (!va)
591 			goto err;
592 
593 		memset(va, 0, pbl_info->pbl_size);
594 		pbl_table[i].va = va;
595 		pbl_table[i].pa = pa;
596 	}
597 
598 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
599 	 * the first one with physical pointers to all of the rest
600 	 */
601 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
602 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
603 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
604 
605 	return pbl_table;
606 
607 err:
608 	for (i--; i >= 0; i--)
609 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
610 				  pbl_table[i].va, pbl_table[i].pa);
611 
612 	qedr_free_pbl(dev, pbl_info, pbl_table);
613 
614 	return ERR_PTR(-ENOMEM);
615 }
616 
617 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
618 				struct qedr_pbl_info *pbl_info,
619 				u32 num_pbes, int two_layer_capable)
620 {
621 	u32 pbl_capacity;
622 	u32 pbl_size;
623 	u32 num_pbls;
624 
625 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
626 		if (num_pbes > MAX_PBES_TWO_LAYER) {
627 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
628 			       num_pbes);
629 			return -EINVAL;
630 		}
631 
632 		/* calculate required pbl page size */
633 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
634 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
635 			       NUM_PBES_ON_PAGE(pbl_size);
636 
637 		while (pbl_capacity < num_pbes) {
638 			pbl_size *= 2;
639 			pbl_capacity = pbl_size / sizeof(u64);
640 			pbl_capacity = pbl_capacity * pbl_capacity;
641 		}
642 
643 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
644 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
645 		pbl_info->two_layered = true;
646 	} else {
647 		/* One layered PBL */
648 		num_pbls = 1;
649 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
650 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
651 		pbl_info->two_layered = false;
652 	}
653 
654 	pbl_info->num_pbls = num_pbls;
655 	pbl_info->pbl_size = pbl_size;
656 	pbl_info->num_pbes = num_pbes;
657 
658 	DP_DEBUG(dev, QEDR_MSG_MR,
659 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
660 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
661 
662 	return 0;
663 }
664 
665 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
666 			       struct qedr_pbl *pbl,
667 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
668 {
669 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
670 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
671 	struct qedr_pbl *pbl_tbl;
672 	struct scatterlist *sg;
673 	struct regpair *pbe;
674 	u64 pg_addr;
675 	int entry;
676 
677 	if (!pbl_info->num_pbes)
678 		return;
679 
680 	/* If we have a two layered pbl, the first pbl points to the rest
681 	 * of the pbls and the first entry lays on the second pbl in the table
682 	 */
683 	if (pbl_info->two_layered)
684 		pbl_tbl = &pbl[1];
685 	else
686 		pbl_tbl = pbl;
687 
688 	pbe = (struct regpair *)pbl_tbl->va;
689 	if (!pbe) {
690 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
691 		return;
692 	}
693 
694 	pbe_cnt = 0;
695 
696 	shift = umem->page_shift;
697 
698 	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
699 
700 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
701 		pages = sg_dma_len(sg) >> shift;
702 		pg_addr = sg_dma_address(sg);
703 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
704 			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
705 				pbe->lo = cpu_to_le32(pg_addr);
706 				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
707 
708 				pg_addr += BIT(pg_shift);
709 				pbe_cnt++;
710 				total_num_pbes++;
711 				pbe++;
712 
713 				if (total_num_pbes == pbl_info->num_pbes)
714 					return;
715 
716 				/* If the given pbl is full storing the pbes,
717 				 * move to next pbl.
718 				 */
719 				if (pbe_cnt ==
720 				    (pbl_info->pbl_size / sizeof(u64))) {
721 					pbl_tbl++;
722 					pbe = (struct regpair *)pbl_tbl->va;
723 					pbe_cnt = 0;
724 				}
725 
726 				fw_pg_cnt++;
727 			}
728 		}
729 	}
730 }
731 
732 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
733 			      struct qedr_cq *cq, struct ib_udata *udata)
734 {
735 	struct qedr_create_cq_uresp uresp;
736 	int rc;
737 
738 	memset(&uresp, 0, sizeof(uresp));
739 
740 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
741 	uresp.icid = cq->icid;
742 
743 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
744 	if (rc)
745 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
746 
747 	return rc;
748 }
749 
750 static void consume_cqe(struct qedr_cq *cq)
751 {
752 	if (cq->latest_cqe == cq->toggle_cqe)
753 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
754 
755 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
756 }
757 
758 static inline int qedr_align_cq_entries(int entries)
759 {
760 	u64 size, aligned_size;
761 
762 	/* We allocate an extra entry that we don't report to the FW. */
763 	size = (entries + 1) * QEDR_CQE_SIZE;
764 	aligned_size = ALIGN(size, PAGE_SIZE);
765 
766 	return aligned_size / QEDR_CQE_SIZE;
767 }
768 
769 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
770 				       struct qedr_dev *dev,
771 				       struct qedr_userq *q,
772 				       u64 buf_addr, size_t buf_len,
773 				       int access, int dmasync)
774 {
775 	u32 fw_pages;
776 	int rc;
777 
778 	q->buf_addr = buf_addr;
779 	q->buf_len = buf_len;
780 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
781 	if (IS_ERR(q->umem)) {
782 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
783 		       PTR_ERR(q->umem));
784 		return PTR_ERR(q->umem);
785 	}
786 
787 	fw_pages = ib_umem_page_count(q->umem) <<
788 	    (q->umem->page_shift - FW_PAGE_SHIFT);
789 
790 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
791 	if (rc)
792 		goto err0;
793 
794 	q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
795 	if (IS_ERR(q->pbl_tbl)) {
796 		rc = PTR_ERR(q->pbl_tbl);
797 		goto err0;
798 	}
799 
800 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
801 				   FW_PAGE_SHIFT);
802 
803 	return 0;
804 
805 err0:
806 	ib_umem_release(q->umem);
807 
808 	return rc;
809 }
810 
811 static inline void qedr_init_cq_params(struct qedr_cq *cq,
812 				       struct qedr_ucontext *ctx,
813 				       struct qedr_dev *dev, int vector,
814 				       int chain_entries, int page_cnt,
815 				       u64 pbl_ptr,
816 				       struct qed_rdma_create_cq_in_params
817 				       *params)
818 {
819 	memset(params, 0, sizeof(*params));
820 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
821 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
822 	params->cnq_id = vector;
823 	params->cq_size = chain_entries - 1;
824 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
825 	params->pbl_num_pages = page_cnt;
826 	params->pbl_ptr = pbl_ptr;
827 	params->pbl_two_level = 0;
828 }
829 
830 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
831 {
832 	/* Flush data before signalling doorbell */
833 	wmb();
834 	cq->db.data.agg_flags = flags;
835 	cq->db.data.value = cpu_to_le32(cons);
836 	writeq(cq->db.raw, cq->db_addr);
837 
838 	/* Make sure write would stick */
839 	mmiowb();
840 }
841 
842 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
843 {
844 	struct qedr_cq *cq = get_qedr_cq(ibcq);
845 	unsigned long sflags;
846 	struct qedr_dev *dev;
847 
848 	dev = get_qedr_dev(ibcq->device);
849 
850 	if (cq->destroyed) {
851 		DP_ERR(dev,
852 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
853 		       cq, cq->icid);
854 		return -EINVAL;
855 	}
856 
857 
858 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
859 		return 0;
860 
861 	spin_lock_irqsave(&cq->cq_lock, sflags);
862 
863 	cq->arm_flags = 0;
864 
865 	if (flags & IB_CQ_SOLICITED)
866 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
867 
868 	if (flags & IB_CQ_NEXT_COMP)
869 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
870 
871 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
872 
873 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
874 
875 	return 0;
876 }
877 
878 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
879 			     const struct ib_cq_init_attr *attr,
880 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
881 {
882 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
883 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
884 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
885 	struct qedr_dev *dev = get_qedr_dev(ibdev);
886 	struct qed_rdma_create_cq_in_params params;
887 	struct qedr_create_cq_ureq ureq;
888 	int vector = attr->comp_vector;
889 	int entries = attr->cqe;
890 	struct qedr_cq *cq;
891 	int chain_entries;
892 	int page_cnt;
893 	u64 pbl_ptr;
894 	u16 icid;
895 	int rc;
896 
897 	DP_DEBUG(dev, QEDR_MSG_INIT,
898 		 "create_cq: called from %s. entries=%d, vector=%d\n",
899 		 udata ? "User Lib" : "Kernel", entries, vector);
900 
901 	if (entries > QEDR_MAX_CQES) {
902 		DP_ERR(dev,
903 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
904 		       entries, QEDR_MAX_CQES);
905 		return ERR_PTR(-EINVAL);
906 	}
907 
908 	chain_entries = qedr_align_cq_entries(entries);
909 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
910 
911 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
912 	if (!cq)
913 		return ERR_PTR(-ENOMEM);
914 
915 	if (udata) {
916 		memset(&ureq, 0, sizeof(ureq));
917 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
918 			DP_ERR(dev,
919 			       "create cq: problem copying data from user space\n");
920 			goto err0;
921 		}
922 
923 		if (!ureq.len) {
924 			DP_ERR(dev,
925 			       "create cq: cannot create a cq with 0 entries\n");
926 			goto err0;
927 		}
928 
929 		cq->cq_type = QEDR_CQ_TYPE_USER;
930 
931 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
932 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
933 		if (rc)
934 			goto err0;
935 
936 		pbl_ptr = cq->q.pbl_tbl->pa;
937 		page_cnt = cq->q.pbl_info.num_pbes;
938 
939 		cq->ibcq.cqe = chain_entries;
940 	} else {
941 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
942 
943 		rc = dev->ops->common->chain_alloc(dev->cdev,
944 						   QED_CHAIN_USE_TO_CONSUME,
945 						   QED_CHAIN_MODE_PBL,
946 						   QED_CHAIN_CNT_TYPE_U32,
947 						   chain_entries,
948 						   sizeof(union rdma_cqe),
949 						   &cq->pbl, NULL);
950 		if (rc)
951 			goto err1;
952 
953 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
954 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
955 		cq->ibcq.cqe = cq->pbl.capacity;
956 	}
957 
958 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
959 			    pbl_ptr, &params);
960 
961 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
962 	if (rc)
963 		goto err2;
964 
965 	cq->icid = icid;
966 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
967 	spin_lock_init(&cq->cq_lock);
968 
969 	if (ib_ctx) {
970 		rc = qedr_copy_cq_uresp(dev, cq, udata);
971 		if (rc)
972 			goto err3;
973 	} else {
974 		/* Generate doorbell address. */
975 		cq->db_addr = dev->db_addr +
976 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
977 		cq->db.data.icid = cq->icid;
978 		cq->db.data.params = DB_AGG_CMD_SET <<
979 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
980 
981 		/* point to the very last element, passing it we will toggle */
982 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
983 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
984 		cq->latest_cqe = NULL;
985 		consume_cqe(cq);
986 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
987 	}
988 
989 	DP_DEBUG(dev, QEDR_MSG_CQ,
990 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
991 		 cq->icid, cq, params.cq_size);
992 
993 	return &cq->ibcq;
994 
995 err3:
996 	destroy_iparams.icid = cq->icid;
997 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
998 				  &destroy_oparams);
999 err2:
1000 	if (udata)
1001 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1002 	else
1003 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1004 err1:
1005 	if (udata)
1006 		ib_umem_release(cq->q.umem);
1007 err0:
1008 	kfree(cq);
1009 	return ERR_PTR(-EINVAL);
1010 }
1011 
1012 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1013 {
1014 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1015 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1016 
1017 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1018 
1019 	return 0;
1020 }
1021 
1022 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1023 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1024 
1025 int qedr_destroy_cq(struct ib_cq *ibcq)
1026 {
1027 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1028 	struct qed_rdma_destroy_cq_out_params oparams;
1029 	struct qed_rdma_destroy_cq_in_params iparams;
1030 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1031 	int iter;
1032 	int rc;
1033 
1034 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1035 
1036 	cq->destroyed = 1;
1037 
1038 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1039 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1040 		goto done;
1041 
1042 	iparams.icid = cq->icid;
1043 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1044 	if (rc)
1045 		return rc;
1046 
1047 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1048 
1049 	if (ibcq->uobject && ibcq->uobject->context) {
1050 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1051 		ib_umem_release(cq->q.umem);
1052 	}
1053 
1054 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1055 	 * wait until all CNQ interrupts, if any, are received. This will always
1056 	 * happen and will always happen very fast. If not, then a serious error
1057 	 * has occured. That is why we can use a long delay.
1058 	 * We spin for a short time so we don’t lose time on context switching
1059 	 * in case all the completions are handled in that span. Otherwise
1060 	 * we sleep for a while and check again. Since the CNQ may be
1061 	 * associated with (only) the current CPU we use msleep to allow the
1062 	 * current CPU to be freed.
1063 	 * The CNQ notification is increased in qedr_irq_handler().
1064 	 */
1065 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1066 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1067 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1068 		iter--;
1069 	}
1070 
1071 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1072 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1073 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1074 		iter--;
1075 	}
1076 
1077 	if (oparams.num_cq_notif != cq->cnq_notif)
1078 		goto err;
1079 
1080 	/* Note that we don't need to have explicit code to wait for the
1081 	 * completion of the event handler because it is invoked from the EQ.
1082 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1083 	 * be certain that there's no event handler in process.
1084 	 */
1085 done:
1086 	cq->sig = ~cq->sig;
1087 
1088 	kfree(cq);
1089 
1090 	return 0;
1091 
1092 err:
1093 	DP_ERR(dev,
1094 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1095 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1096 
1097 	return -EINVAL;
1098 }
1099 
1100 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1101 					  struct ib_qp_attr *attr,
1102 					  int attr_mask,
1103 					  struct qed_rdma_modify_qp_in_params
1104 					  *qp_params)
1105 {
1106 	enum rdma_network_type nw_type;
1107 	struct ib_gid_attr gid_attr;
1108 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1109 	union ib_gid gid;
1110 	u32 ipv4_addr;
1111 	int rc = 0;
1112 	int i;
1113 
1114 	rc = ib_get_cached_gid(ibqp->device,
1115 			       rdma_ah_get_port_num(&attr->ah_attr),
1116 			       grh->sgid_index, &gid, &gid_attr);
1117 	if (rc)
1118 		return rc;
1119 
1120 	if (!memcmp(&gid, &zgid, sizeof(gid)))
1121 		return -ENOENT;
1122 
1123 	if (gid_attr.ndev) {
1124 		qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1125 
1126 		dev_put(gid_attr.ndev);
1127 		nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1128 		switch (nw_type) {
1129 		case RDMA_NETWORK_IPV6:
1130 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1131 			       sizeof(qp_params->sgid));
1132 			memcpy(&qp_params->dgid.bytes[0],
1133 			       &grh->dgid,
1134 			       sizeof(qp_params->dgid));
1135 			qp_params->roce_mode = ROCE_V2_IPV6;
1136 			SET_FIELD(qp_params->modify_flags,
1137 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1138 			break;
1139 		case RDMA_NETWORK_IB:
1140 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1141 			       sizeof(qp_params->sgid));
1142 			memcpy(&qp_params->dgid.bytes[0],
1143 			       &grh->dgid,
1144 			       sizeof(qp_params->dgid));
1145 			qp_params->roce_mode = ROCE_V1;
1146 			break;
1147 		case RDMA_NETWORK_IPV4:
1148 			memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1149 			memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1150 			ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1151 			qp_params->sgid.ipv4_addr = ipv4_addr;
1152 			ipv4_addr =
1153 			    qedr_get_ipv4_from_gid(grh->dgid.raw);
1154 			qp_params->dgid.ipv4_addr = ipv4_addr;
1155 			SET_FIELD(qp_params->modify_flags,
1156 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1157 			qp_params->roce_mode = ROCE_V2_IPV4;
1158 			break;
1159 		}
1160 	}
1161 
1162 	for (i = 0; i < 4; i++) {
1163 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1164 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1165 	}
1166 
1167 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1168 		qp_params->vlan_id = 0;
1169 
1170 	return 0;
1171 }
1172 
1173 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1174 			       struct ib_qp_init_attr *attrs)
1175 {
1176 	struct qedr_device_attr *qattr = &dev->attr;
1177 
1178 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1179 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1180 		DP_DEBUG(dev, QEDR_MSG_QP,
1181 			 "create qp: unsupported qp type=0x%x requested\n",
1182 			 attrs->qp_type);
1183 		return -EINVAL;
1184 	}
1185 
1186 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1187 		DP_ERR(dev,
1188 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1189 		       attrs->cap.max_send_wr, qattr->max_sqe);
1190 		return -EINVAL;
1191 	}
1192 
1193 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1194 		DP_ERR(dev,
1195 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1196 		       attrs->cap.max_inline_data, qattr->max_inline);
1197 		return -EINVAL;
1198 	}
1199 
1200 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1201 		DP_ERR(dev,
1202 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1203 		       attrs->cap.max_send_sge, qattr->max_sge);
1204 		return -EINVAL;
1205 	}
1206 
1207 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1208 		DP_ERR(dev,
1209 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1210 		       attrs->cap.max_recv_sge, qattr->max_sge);
1211 		return -EINVAL;
1212 	}
1213 
1214 	/* Unprivileged user space cannot create special QP */
1215 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1216 		DP_ERR(dev,
1217 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1218 		       attrs->qp_type);
1219 		return -EINVAL;
1220 	}
1221 
1222 	return 0;
1223 }
1224 
1225 static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
1226 			       struct qedr_qp *qp)
1227 {
1228 	uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1229 	uresp->rq_icid = qp->icid;
1230 }
1231 
1232 static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
1233 			       struct qedr_qp *qp)
1234 {
1235 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1236 	uresp->sq_icid = qp->icid + 1;
1237 }
1238 
1239 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1240 			      struct qedr_qp *qp, struct ib_udata *udata)
1241 {
1242 	struct qedr_create_qp_uresp uresp;
1243 	int rc;
1244 
1245 	memset(&uresp, 0, sizeof(uresp));
1246 	qedr_copy_sq_uresp(&uresp, qp);
1247 	qedr_copy_rq_uresp(&uresp, qp);
1248 
1249 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1250 	uresp.qp_id = qp->qp_id;
1251 
1252 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1253 	if (rc)
1254 		DP_ERR(dev,
1255 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1256 		       qp->icid);
1257 
1258 	return rc;
1259 }
1260 
1261 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1262 				      struct qedr_qp *qp,
1263 				      struct qedr_pd *pd,
1264 				      struct ib_qp_init_attr *attrs)
1265 {
1266 	spin_lock_init(&qp->q_lock);
1267 	qp->pd = pd;
1268 	qp->qp_type = attrs->qp_type;
1269 	qp->max_inline_data = attrs->cap.max_inline_data;
1270 	qp->sq.max_sges = attrs->cap.max_send_sge;
1271 	qp->state = QED_ROCE_QP_STATE_RESET;
1272 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1273 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1274 	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1275 	qp->dev = dev;
1276 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1277 
1278 	DP_DEBUG(dev, QEDR_MSG_QP,
1279 		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1280 		 qp->rq.max_sges, qp->rq_cq->icid);
1281 	DP_DEBUG(dev, QEDR_MSG_QP,
1282 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1283 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1284 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1285 	DP_DEBUG(dev, QEDR_MSG_QP,
1286 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1287 		 qp->sq.max_sges, qp->sq_cq->icid);
1288 }
1289 
1290 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1291 {
1292 	qp->sq.db = dev->db_addr +
1293 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1294 	qp->sq.db_data.data.icid = qp->icid + 1;
1295 	qp->rq.db = dev->db_addr +
1296 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1297 	qp->rq.db_data.data.icid = qp->icid;
1298 }
1299 
1300 static inline void
1301 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1302 			      struct qedr_pd *pd,
1303 			      struct qedr_qp *qp,
1304 			      struct ib_qp_init_attr *attrs,
1305 			      bool fmr_and_reserved_lkey,
1306 			      struct qed_rdma_create_qp_in_params *params)
1307 {
1308 	/* QP handle to be written in an async event */
1309 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1310 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1311 
1312 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1313 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1314 	params->pd = pd->pd_id;
1315 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1316 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1317 	params->stats_queue = 0;
1318 	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1319 	params->srq_id = 0;
1320 	params->use_srq = false;
1321 }
1322 
1323 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1324 {
1325 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1326 		 "qp=%p. "
1327 		 "sq_addr=0x%llx, "
1328 		 "sq_len=%zd, "
1329 		 "rq_addr=0x%llx, "
1330 		 "rq_len=%zd"
1331 		 "\n",
1332 		 qp,
1333 		 qp->usq.buf_addr,
1334 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1335 }
1336 
1337 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1338 {
1339 	if (qp->usq.umem)
1340 		ib_umem_release(qp->usq.umem);
1341 	qp->usq.umem = NULL;
1342 
1343 	if (qp->urq.umem)
1344 		ib_umem_release(qp->urq.umem);
1345 	qp->urq.umem = NULL;
1346 }
1347 
1348 static int qedr_create_user_qp(struct qedr_dev *dev,
1349 			       struct qedr_qp *qp,
1350 			       struct ib_pd *ibpd,
1351 			       struct ib_udata *udata,
1352 			       struct ib_qp_init_attr *attrs)
1353 {
1354 	struct qed_rdma_create_qp_in_params in_params;
1355 	struct qed_rdma_create_qp_out_params out_params;
1356 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1357 	struct ib_ucontext *ib_ctx = NULL;
1358 	struct qedr_ucontext *ctx = NULL;
1359 	struct qedr_create_qp_ureq ureq;
1360 	int rc = -EINVAL;
1361 
1362 	ib_ctx = ibpd->uobject->context;
1363 	ctx = get_qedr_ucontext(ib_ctx);
1364 
1365 	memset(&ureq, 0, sizeof(ureq));
1366 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1367 	if (rc) {
1368 		DP_ERR(dev, "Problem copying data from user space\n");
1369 		return rc;
1370 	}
1371 
1372 	/* SQ - read access only (0), dma sync not required (0) */
1373 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1374 				  ureq.sq_len, 0, 0);
1375 	if (rc)
1376 		return rc;
1377 
1378 	/* RQ - read access only (0), dma sync not required (0) */
1379 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1380 				  ureq.rq_len, 0, 0);
1381 
1382 	if (rc)
1383 		return rc;
1384 
1385 	memset(&in_params, 0, sizeof(in_params));
1386 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1387 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1388 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1389 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1390 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1391 	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1392 	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1393 
1394 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1395 					      &in_params, &out_params);
1396 
1397 	if (!qp->qed_qp) {
1398 		rc = -ENOMEM;
1399 		goto err1;
1400 	}
1401 
1402 	qp->qp_id = out_params.qp_id;
1403 	qp->icid = out_params.icid;
1404 
1405 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1406 	if (rc)
1407 		goto err;
1408 
1409 	qedr_qp_user_print(dev, qp);
1410 
1411 	return 0;
1412 err:
1413 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1414 	if (rc)
1415 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1416 
1417 err1:
1418 	qedr_cleanup_user(dev, qp);
1419 	return rc;
1420 }
1421 
1422 static int
1423 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1424 			   struct qedr_qp *qp,
1425 			   struct qed_rdma_create_qp_in_params *in_params,
1426 			   u32 n_sq_elems, u32 n_rq_elems)
1427 {
1428 	struct qed_rdma_create_qp_out_params out_params;
1429 	int rc;
1430 
1431 	rc = dev->ops->common->chain_alloc(dev->cdev,
1432 					   QED_CHAIN_USE_TO_PRODUCE,
1433 					   QED_CHAIN_MODE_PBL,
1434 					   QED_CHAIN_CNT_TYPE_U32,
1435 					   n_sq_elems,
1436 					   QEDR_SQE_ELEMENT_SIZE,
1437 					   &qp->sq.pbl, NULL);
1438 
1439 	if (rc)
1440 		return rc;
1441 
1442 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1443 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1444 
1445 	rc = dev->ops->common->chain_alloc(dev->cdev,
1446 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1447 					   QED_CHAIN_MODE_PBL,
1448 					   QED_CHAIN_CNT_TYPE_U32,
1449 					   n_rq_elems,
1450 					   QEDR_RQE_ELEMENT_SIZE,
1451 					   &qp->rq.pbl, NULL);
1452 	if (rc)
1453 		return rc;
1454 
1455 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1456 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1457 
1458 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1459 					      in_params, &out_params);
1460 
1461 	if (!qp->qed_qp)
1462 		return -EINVAL;
1463 
1464 	qp->qp_id = out_params.qp_id;
1465 	qp->icid = out_params.icid;
1466 
1467 	qedr_set_roce_db_info(dev, qp);
1468 
1469 	return 0;
1470 }
1471 
1472 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1473 {
1474 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1475 	kfree(qp->wqe_wr_id);
1476 
1477 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1478 	kfree(qp->rqe_wr_id);
1479 }
1480 
1481 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1482 				 struct qedr_qp *qp,
1483 				 struct ib_pd *ibpd,
1484 				 struct ib_qp_init_attr *attrs)
1485 {
1486 	struct qed_rdma_create_qp_in_params in_params;
1487 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1488 	int rc = -EINVAL;
1489 	u32 n_rq_elems;
1490 	u32 n_sq_elems;
1491 	u32 n_sq_entries;
1492 
1493 	memset(&in_params, 0, sizeof(in_params));
1494 
1495 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1496 	 * the ring. The ring should allow at least a single WR, even if the
1497 	 * user requested none, due to allocation issues.
1498 	 * We should add an extra WR since the prod and cons indices of
1499 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1500 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1501 	 * double the number of entries due an iSER issue that pushes far more
1502 	 * WRs than indicated. If we decline its ib_post_send() then we get
1503 	 * error prints in the dmesg we'd like to avoid.
1504 	 */
1505 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1506 			      dev->attr.max_sqe);
1507 
1508 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1509 				GFP_KERNEL);
1510 	if (!qp->wqe_wr_id) {
1511 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1512 		return -ENOMEM;
1513 	}
1514 
1515 	/* QP handle to be written in CQE */
1516 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1517 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1518 
1519 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1520 	 * the ring. There ring should allow at least a single WR, even if the
1521 	 * user requested none, due to allocation issues.
1522 	 */
1523 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1524 
1525 	/* Allocate driver internal RQ array */
1526 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1527 				GFP_KERNEL);
1528 	if (!qp->rqe_wr_id) {
1529 		DP_ERR(dev,
1530 		       "create qp: failed RQ shadow memory allocation\n");
1531 		kfree(qp->wqe_wr_id);
1532 		return -ENOMEM;
1533 	}
1534 
1535 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1536 
1537 	n_sq_entries = attrs->cap.max_send_wr;
1538 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1539 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1540 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1541 
1542 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1543 
1544 	rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1545 					n_sq_elems, n_rq_elems);
1546 	if (rc)
1547 		qedr_cleanup_kernel(dev, qp);
1548 
1549 	return rc;
1550 }
1551 
1552 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1553 			     struct ib_qp_init_attr *attrs,
1554 			     struct ib_udata *udata)
1555 {
1556 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1557 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1558 	struct qedr_qp *qp;
1559 	struct ib_qp *ibqp;
1560 	int rc = 0;
1561 
1562 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1563 		 udata ? "user library" : "kernel", pd);
1564 
1565 	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1566 	if (rc)
1567 		return ERR_PTR(rc);
1568 
1569 	if (attrs->srq)
1570 		return ERR_PTR(-EINVAL);
1571 
1572 	DP_DEBUG(dev, QEDR_MSG_QP,
1573 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1574 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1575 		 get_qedr_cq(attrs->send_cq),
1576 		 get_qedr_cq(attrs->send_cq)->icid,
1577 		 get_qedr_cq(attrs->recv_cq),
1578 		 get_qedr_cq(attrs->recv_cq)->icid);
1579 
1580 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1581 	if (!qp) {
1582 		DP_ERR(dev, "create qp: failed allocating memory\n");
1583 		return ERR_PTR(-ENOMEM);
1584 	}
1585 
1586 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1587 
1588 	if (attrs->qp_type == IB_QPT_GSI) {
1589 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1590 		if (IS_ERR(ibqp))
1591 			kfree(qp);
1592 		return ibqp;
1593 	}
1594 
1595 	if (udata)
1596 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1597 	else
1598 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1599 
1600 	if (rc)
1601 		goto err;
1602 
1603 	qp->ibqp.qp_num = qp->qp_id;
1604 
1605 	return &qp->ibqp;
1606 
1607 err:
1608 	kfree(qp);
1609 
1610 	return ERR_PTR(-EFAULT);
1611 }
1612 
1613 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1614 {
1615 	switch (qp_state) {
1616 	case QED_ROCE_QP_STATE_RESET:
1617 		return IB_QPS_RESET;
1618 	case QED_ROCE_QP_STATE_INIT:
1619 		return IB_QPS_INIT;
1620 	case QED_ROCE_QP_STATE_RTR:
1621 		return IB_QPS_RTR;
1622 	case QED_ROCE_QP_STATE_RTS:
1623 		return IB_QPS_RTS;
1624 	case QED_ROCE_QP_STATE_SQD:
1625 		return IB_QPS_SQD;
1626 	case QED_ROCE_QP_STATE_ERR:
1627 		return IB_QPS_ERR;
1628 	case QED_ROCE_QP_STATE_SQE:
1629 		return IB_QPS_SQE;
1630 	}
1631 	return IB_QPS_ERR;
1632 }
1633 
1634 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1635 					enum ib_qp_state qp_state)
1636 {
1637 	switch (qp_state) {
1638 	case IB_QPS_RESET:
1639 		return QED_ROCE_QP_STATE_RESET;
1640 	case IB_QPS_INIT:
1641 		return QED_ROCE_QP_STATE_INIT;
1642 	case IB_QPS_RTR:
1643 		return QED_ROCE_QP_STATE_RTR;
1644 	case IB_QPS_RTS:
1645 		return QED_ROCE_QP_STATE_RTS;
1646 	case IB_QPS_SQD:
1647 		return QED_ROCE_QP_STATE_SQD;
1648 	case IB_QPS_ERR:
1649 		return QED_ROCE_QP_STATE_ERR;
1650 	default:
1651 		return QED_ROCE_QP_STATE_ERR;
1652 	}
1653 }
1654 
1655 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1656 {
1657 	qed_chain_reset(&qph->pbl);
1658 	qph->prod = 0;
1659 	qph->cons = 0;
1660 	qph->wqe_cons = 0;
1661 	qph->db_data.data.value = cpu_to_le16(0);
1662 }
1663 
1664 static int qedr_update_qp_state(struct qedr_dev *dev,
1665 				struct qedr_qp *qp,
1666 				enum qed_roce_qp_state new_state)
1667 {
1668 	int status = 0;
1669 
1670 	if (new_state == qp->state)
1671 		return 0;
1672 
1673 	switch (qp->state) {
1674 	case QED_ROCE_QP_STATE_RESET:
1675 		switch (new_state) {
1676 		case QED_ROCE_QP_STATE_INIT:
1677 			qp->prev_wqe_size = 0;
1678 			qedr_reset_qp_hwq_info(&qp->sq);
1679 			qedr_reset_qp_hwq_info(&qp->rq);
1680 			break;
1681 		default:
1682 			status = -EINVAL;
1683 			break;
1684 		};
1685 		break;
1686 	case QED_ROCE_QP_STATE_INIT:
1687 		switch (new_state) {
1688 		case QED_ROCE_QP_STATE_RTR:
1689 			/* Update doorbell (in case post_recv was
1690 			 * done before move to RTR)
1691 			 */
1692 			wmb();
1693 			writel(qp->rq.db_data.raw, qp->rq.db);
1694 			/* Make sure write takes effect */
1695 			mmiowb();
1696 			break;
1697 		case QED_ROCE_QP_STATE_ERR:
1698 			break;
1699 		default:
1700 			/* Invalid state change. */
1701 			status = -EINVAL;
1702 			break;
1703 		};
1704 		break;
1705 	case QED_ROCE_QP_STATE_RTR:
1706 		/* RTR->XXX */
1707 		switch (new_state) {
1708 		case QED_ROCE_QP_STATE_RTS:
1709 			break;
1710 		case QED_ROCE_QP_STATE_ERR:
1711 			break;
1712 		default:
1713 			/* Invalid state change. */
1714 			status = -EINVAL;
1715 			break;
1716 		};
1717 		break;
1718 	case QED_ROCE_QP_STATE_RTS:
1719 		/* RTS->XXX */
1720 		switch (new_state) {
1721 		case QED_ROCE_QP_STATE_SQD:
1722 			break;
1723 		case QED_ROCE_QP_STATE_ERR:
1724 			break;
1725 		default:
1726 			/* Invalid state change. */
1727 			status = -EINVAL;
1728 			break;
1729 		};
1730 		break;
1731 	case QED_ROCE_QP_STATE_SQD:
1732 		/* SQD->XXX */
1733 		switch (new_state) {
1734 		case QED_ROCE_QP_STATE_RTS:
1735 		case QED_ROCE_QP_STATE_ERR:
1736 			break;
1737 		default:
1738 			/* Invalid state change. */
1739 			status = -EINVAL;
1740 			break;
1741 		};
1742 		break;
1743 	case QED_ROCE_QP_STATE_ERR:
1744 		/* ERR->XXX */
1745 		switch (new_state) {
1746 		case QED_ROCE_QP_STATE_RESET:
1747 			if ((qp->rq.prod != qp->rq.cons) ||
1748 			    (qp->sq.prod != qp->sq.cons)) {
1749 				DP_NOTICE(dev,
1750 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1751 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1752 					  qp->sq.cons);
1753 				status = -EINVAL;
1754 			}
1755 			break;
1756 		default:
1757 			status = -EINVAL;
1758 			break;
1759 		};
1760 		break;
1761 	default:
1762 		status = -EINVAL;
1763 		break;
1764 	};
1765 
1766 	return status;
1767 }
1768 
1769 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1770 		   int attr_mask, struct ib_udata *udata)
1771 {
1772 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1773 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1774 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1775 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1776 	enum ib_qp_state old_qp_state, new_qp_state;
1777 	int rc = 0;
1778 
1779 	DP_DEBUG(dev, QEDR_MSG_QP,
1780 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1781 		 attr->qp_state);
1782 
1783 	old_qp_state = qedr_get_ibqp_state(qp->state);
1784 	if (attr_mask & IB_QP_STATE)
1785 		new_qp_state = attr->qp_state;
1786 	else
1787 		new_qp_state = old_qp_state;
1788 
1789 	if (!ib_modify_qp_is_ok
1790 	    (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
1791 	     IB_LINK_LAYER_ETHERNET)) {
1792 		DP_ERR(dev,
1793 		       "modify qp: invalid attribute mask=0x%x specified for\n"
1794 		       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1795 		       attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
1796 		       new_qp_state);
1797 		rc = -EINVAL;
1798 		goto err;
1799 	}
1800 
1801 	/* Translate the masks... */
1802 	if (attr_mask & IB_QP_STATE) {
1803 		SET_FIELD(qp_params.modify_flags,
1804 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1805 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1806 	}
1807 
1808 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1809 		qp_params.sqd_async = true;
1810 
1811 	if (attr_mask & IB_QP_PKEY_INDEX) {
1812 		SET_FIELD(qp_params.modify_flags,
1813 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1814 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1815 			rc = -EINVAL;
1816 			goto err;
1817 		}
1818 
1819 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1820 	}
1821 
1822 	if (attr_mask & IB_QP_QKEY)
1823 		qp->qkey = attr->qkey;
1824 
1825 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1826 		SET_FIELD(qp_params.modify_flags,
1827 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1828 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1829 						  IB_ACCESS_REMOTE_READ;
1830 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1831 						   IB_ACCESS_REMOTE_WRITE;
1832 		qp_params.incoming_atomic_en = attr->qp_access_flags &
1833 					       IB_ACCESS_REMOTE_ATOMIC;
1834 	}
1835 
1836 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1837 		if (attr_mask & IB_QP_PATH_MTU) {
1838 			if (attr->path_mtu < IB_MTU_256 ||
1839 			    attr->path_mtu > IB_MTU_4096) {
1840 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1841 				rc = -EINVAL;
1842 				goto err;
1843 			}
1844 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1845 				      ib_mtu_enum_to_int(iboe_get_mtu
1846 							 (dev->ndev->mtu)));
1847 		}
1848 
1849 		if (!qp->mtu) {
1850 			qp->mtu =
1851 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1852 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1853 		}
1854 
1855 		SET_FIELD(qp_params.modify_flags,
1856 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1857 
1858 		qp_params.traffic_class_tos = grh->traffic_class;
1859 		qp_params.flow_label = grh->flow_label;
1860 		qp_params.hop_limit_ttl = grh->hop_limit;
1861 
1862 		qp->sgid_idx = grh->sgid_index;
1863 
1864 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1865 		if (rc) {
1866 			DP_ERR(dev,
1867 			       "modify qp: problems with GID index %d (rc=%d)\n",
1868 			       grh->sgid_index, rc);
1869 			return rc;
1870 		}
1871 
1872 		rc = qedr_get_dmac(dev, &attr->ah_attr,
1873 				   qp_params.remote_mac_addr);
1874 		if (rc)
1875 			return rc;
1876 
1877 		qp_params.use_local_mac = true;
1878 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
1879 
1880 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
1881 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
1882 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
1883 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
1884 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
1885 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
1886 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
1887 			 qp_params.remote_mac_addr);
1888 
1889 		qp_params.mtu = qp->mtu;
1890 		qp_params.lb_indication = false;
1891 	}
1892 
1893 	if (!qp_params.mtu) {
1894 		/* Stay with current MTU */
1895 		if (qp->mtu)
1896 			qp_params.mtu = qp->mtu;
1897 		else
1898 			qp_params.mtu =
1899 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1900 	}
1901 
1902 	if (attr_mask & IB_QP_TIMEOUT) {
1903 		SET_FIELD(qp_params.modify_flags,
1904 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
1905 
1906 		qp_params.ack_timeout = attr->timeout;
1907 		if (attr->timeout) {
1908 			u32 temp;
1909 
1910 			temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
1911 			/* FW requires [msec] */
1912 			qp_params.ack_timeout = temp;
1913 		} else {
1914 			/* Infinite */
1915 			qp_params.ack_timeout = 0;
1916 		}
1917 	}
1918 	if (attr_mask & IB_QP_RETRY_CNT) {
1919 		SET_FIELD(qp_params.modify_flags,
1920 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
1921 		qp_params.retry_cnt = attr->retry_cnt;
1922 	}
1923 
1924 	if (attr_mask & IB_QP_RNR_RETRY) {
1925 		SET_FIELD(qp_params.modify_flags,
1926 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
1927 		qp_params.rnr_retry_cnt = attr->rnr_retry;
1928 	}
1929 
1930 	if (attr_mask & IB_QP_RQ_PSN) {
1931 		SET_FIELD(qp_params.modify_flags,
1932 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
1933 		qp_params.rq_psn = attr->rq_psn;
1934 		qp->rq_psn = attr->rq_psn;
1935 	}
1936 
1937 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1938 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
1939 			rc = -EINVAL;
1940 			DP_ERR(dev,
1941 			       "unsupported max_rd_atomic=%d, supported=%d\n",
1942 			       attr->max_rd_atomic,
1943 			       dev->attr.max_qp_req_rd_atomic_resc);
1944 			goto err;
1945 		}
1946 
1947 		SET_FIELD(qp_params.modify_flags,
1948 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
1949 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
1950 	}
1951 
1952 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1953 		SET_FIELD(qp_params.modify_flags,
1954 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
1955 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
1956 	}
1957 
1958 	if (attr_mask & IB_QP_SQ_PSN) {
1959 		SET_FIELD(qp_params.modify_flags,
1960 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
1961 		qp_params.sq_psn = attr->sq_psn;
1962 		qp->sq_psn = attr->sq_psn;
1963 	}
1964 
1965 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1966 		if (attr->max_dest_rd_atomic >
1967 		    dev->attr.max_qp_resp_rd_atomic_resc) {
1968 			DP_ERR(dev,
1969 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
1970 			       attr->max_dest_rd_atomic,
1971 			       dev->attr.max_qp_resp_rd_atomic_resc);
1972 
1973 			rc = -EINVAL;
1974 			goto err;
1975 		}
1976 
1977 		SET_FIELD(qp_params.modify_flags,
1978 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
1979 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
1980 	}
1981 
1982 	if (attr_mask & IB_QP_DEST_QPN) {
1983 		SET_FIELD(qp_params.modify_flags,
1984 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
1985 
1986 		qp_params.dest_qp = attr->dest_qp_num;
1987 		qp->dest_qp_num = attr->dest_qp_num;
1988 	}
1989 
1990 	if (qp->qp_type != IB_QPT_GSI)
1991 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
1992 					      qp->qed_qp, &qp_params);
1993 
1994 	if (attr_mask & IB_QP_STATE) {
1995 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
1996 			rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
1997 		qp->state = qp_params.new_state;
1998 	}
1999 
2000 err:
2001 	return rc;
2002 }
2003 
2004 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2005 {
2006 	int ib_qp_acc_flags = 0;
2007 
2008 	if (params->incoming_rdma_write_en)
2009 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2010 	if (params->incoming_rdma_read_en)
2011 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2012 	if (params->incoming_atomic_en)
2013 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2014 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2015 	return ib_qp_acc_flags;
2016 }
2017 
2018 int qedr_query_qp(struct ib_qp *ibqp,
2019 		  struct ib_qp_attr *qp_attr,
2020 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2021 {
2022 	struct qed_rdma_query_qp_out_params params;
2023 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2024 	struct qedr_dev *dev = qp->dev;
2025 	int rc = 0;
2026 
2027 	memset(&params, 0, sizeof(params));
2028 
2029 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2030 	if (rc)
2031 		goto err;
2032 
2033 	memset(qp_attr, 0, sizeof(*qp_attr));
2034 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2035 
2036 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2037 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2038 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2039 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2040 	qp_attr->rq_psn = params.rq_psn;
2041 	qp_attr->sq_psn = params.sq_psn;
2042 	qp_attr->dest_qp_num = params.dest_qp;
2043 
2044 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2045 
2046 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2047 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2048 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2049 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2050 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2051 	qp_init_attr->cap = qp_attr->cap;
2052 
2053 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2054 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2055 			params.flow_label, qp->sgid_idx,
2056 			params.hop_limit_ttl, params.traffic_class_tos);
2057 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2058 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2059 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2060 	qp_attr->timeout = params.timeout;
2061 	qp_attr->rnr_retry = params.rnr_retry;
2062 	qp_attr->retry_cnt = params.retry_cnt;
2063 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2064 	qp_attr->pkey_index = params.pkey_index;
2065 	qp_attr->port_num = 1;
2066 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2067 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2068 	qp_attr->alt_pkey_index = 0;
2069 	qp_attr->alt_port_num = 0;
2070 	qp_attr->alt_timeout = 0;
2071 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2072 
2073 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2074 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2075 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2076 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2077 
2078 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2079 		 qp_attr->cap.max_inline_data);
2080 
2081 err:
2082 	return rc;
2083 }
2084 
2085 int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2086 {
2087 	int rc = 0;
2088 
2089 	if (qp->qp_type != IB_QPT_GSI) {
2090 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2091 		if (rc)
2092 			return rc;
2093 	}
2094 
2095 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2096 		qedr_cleanup_user(dev, qp);
2097 	else
2098 		qedr_cleanup_kernel(dev, qp);
2099 
2100 	return 0;
2101 }
2102 
2103 int qedr_destroy_qp(struct ib_qp *ibqp)
2104 {
2105 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2106 	struct qedr_dev *dev = qp->dev;
2107 	struct ib_qp_attr attr;
2108 	int attr_mask = 0;
2109 	int rc = 0;
2110 
2111 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2112 		 qp, qp->qp_type);
2113 
2114 	if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2115 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2116 	    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2117 
2118 		attr.qp_state = IB_QPS_ERR;
2119 		attr_mask |= IB_QP_STATE;
2120 
2121 		/* Change the QP state to ERROR */
2122 		qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2123 	}
2124 
2125 	if (qp->qp_type == IB_QPT_GSI)
2126 		qedr_destroy_gsi_qp(dev);
2127 
2128 	qedr_free_qp_resources(dev, qp);
2129 
2130 	kfree(qp);
2131 
2132 	return rc;
2133 }
2134 
2135 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2136 			     struct ib_udata *udata)
2137 {
2138 	struct qedr_ah *ah;
2139 
2140 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2141 	if (!ah)
2142 		return ERR_PTR(-ENOMEM);
2143 
2144 	ah->attr = *attr;
2145 
2146 	return &ah->ibah;
2147 }
2148 
2149 int qedr_destroy_ah(struct ib_ah *ibah)
2150 {
2151 	struct qedr_ah *ah = get_qedr_ah(ibah);
2152 
2153 	kfree(ah);
2154 	return 0;
2155 }
2156 
2157 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2158 {
2159 	struct qedr_pbl *pbl, *tmp;
2160 
2161 	if (info->pbl_table)
2162 		list_add_tail(&info->pbl_table->list_entry,
2163 			      &info->free_pbl_list);
2164 
2165 	if (!list_empty(&info->inuse_pbl_list))
2166 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2167 
2168 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2169 		list_del(&pbl->list_entry);
2170 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2171 	}
2172 }
2173 
2174 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2175 			size_t page_list_len, bool two_layered)
2176 {
2177 	struct qedr_pbl *tmp;
2178 	int rc;
2179 
2180 	INIT_LIST_HEAD(&info->free_pbl_list);
2181 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2182 
2183 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2184 				  page_list_len, two_layered);
2185 	if (rc)
2186 		goto done;
2187 
2188 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2189 	if (IS_ERR(info->pbl_table)) {
2190 		rc = PTR_ERR(info->pbl_table);
2191 		goto done;
2192 	}
2193 
2194 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2195 		 &info->pbl_table->pa);
2196 
2197 	/* in usual case we use 2 PBLs, so we add one to free
2198 	 * list and allocating another one
2199 	 */
2200 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2201 	if (IS_ERR(tmp)) {
2202 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2203 		goto done;
2204 	}
2205 
2206 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2207 
2208 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2209 
2210 done:
2211 	if (rc)
2212 		free_mr_info(dev, info);
2213 
2214 	return rc;
2215 }
2216 
2217 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2218 			       u64 usr_addr, int acc, struct ib_udata *udata)
2219 {
2220 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2221 	struct qedr_mr *mr;
2222 	struct qedr_pd *pd;
2223 	int rc = -ENOMEM;
2224 
2225 	pd = get_qedr_pd(ibpd);
2226 	DP_DEBUG(dev, QEDR_MSG_MR,
2227 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2228 		 pd->pd_id, start, len, usr_addr, acc);
2229 
2230 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2231 		return ERR_PTR(-EINVAL);
2232 
2233 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2234 	if (!mr)
2235 		return ERR_PTR(rc);
2236 
2237 	mr->type = QEDR_MR_USER;
2238 
2239 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2240 	if (IS_ERR(mr->umem)) {
2241 		rc = -EFAULT;
2242 		goto err0;
2243 	}
2244 
2245 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2246 	if (rc)
2247 		goto err1;
2248 
2249 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2250 			   &mr->info.pbl_info, mr->umem->page_shift);
2251 
2252 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2253 	if (rc) {
2254 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2255 		goto err1;
2256 	}
2257 
2258 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2259 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2260 	mr->hw_mr.key = 0;
2261 	mr->hw_mr.pd = pd->pd_id;
2262 	mr->hw_mr.local_read = 1;
2263 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2264 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2265 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2266 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2267 	mr->hw_mr.mw_bind = false;
2268 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2269 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2270 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2271 	mr->hw_mr.page_size_log = mr->umem->page_shift;
2272 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2273 	mr->hw_mr.length = len;
2274 	mr->hw_mr.vaddr = usr_addr;
2275 	mr->hw_mr.zbva = false;
2276 	mr->hw_mr.phy_mr = false;
2277 	mr->hw_mr.dma_mr = false;
2278 
2279 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2280 	if (rc) {
2281 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2282 		goto err2;
2283 	}
2284 
2285 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2286 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2287 	    mr->hw_mr.remote_atomic)
2288 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2289 
2290 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2291 		 mr->ibmr.lkey);
2292 	return &mr->ibmr;
2293 
2294 err2:
2295 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2296 err1:
2297 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2298 err0:
2299 	kfree(mr);
2300 	return ERR_PTR(rc);
2301 }
2302 
2303 int qedr_dereg_mr(struct ib_mr *ib_mr)
2304 {
2305 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2306 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2307 	int rc = 0;
2308 
2309 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2310 	if (rc)
2311 		return rc;
2312 
2313 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2314 
2315 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2316 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2317 
2318 	/* it could be user registered memory. */
2319 	if (mr->umem)
2320 		ib_umem_release(mr->umem);
2321 
2322 	kfree(mr);
2323 
2324 	return rc;
2325 }
2326 
2327 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2328 				       int max_page_list_len)
2329 {
2330 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2331 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2332 	struct qedr_mr *mr;
2333 	int rc = -ENOMEM;
2334 
2335 	DP_DEBUG(dev, QEDR_MSG_MR,
2336 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2337 		 max_page_list_len);
2338 
2339 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2340 	if (!mr)
2341 		return ERR_PTR(rc);
2342 
2343 	mr->dev = dev;
2344 	mr->type = QEDR_MR_FRMR;
2345 
2346 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2347 	if (rc)
2348 		goto err0;
2349 
2350 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2351 	if (rc) {
2352 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2353 		goto err0;
2354 	}
2355 
2356 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2357 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2358 	mr->hw_mr.key = 0;
2359 	mr->hw_mr.pd = pd->pd_id;
2360 	mr->hw_mr.local_read = 1;
2361 	mr->hw_mr.local_write = 0;
2362 	mr->hw_mr.remote_read = 0;
2363 	mr->hw_mr.remote_write = 0;
2364 	mr->hw_mr.remote_atomic = 0;
2365 	mr->hw_mr.mw_bind = false;
2366 	mr->hw_mr.pbl_ptr = 0;
2367 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2368 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2369 	mr->hw_mr.fbo = 0;
2370 	mr->hw_mr.length = 0;
2371 	mr->hw_mr.vaddr = 0;
2372 	mr->hw_mr.zbva = false;
2373 	mr->hw_mr.phy_mr = true;
2374 	mr->hw_mr.dma_mr = false;
2375 
2376 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2377 	if (rc) {
2378 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2379 		goto err1;
2380 	}
2381 
2382 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2383 	mr->ibmr.rkey = mr->ibmr.lkey;
2384 
2385 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2386 	return mr;
2387 
2388 err1:
2389 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2390 err0:
2391 	kfree(mr);
2392 	return ERR_PTR(rc);
2393 }
2394 
2395 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2396 			    enum ib_mr_type mr_type, u32 max_num_sg)
2397 {
2398 	struct qedr_dev *dev;
2399 	struct qedr_mr *mr;
2400 
2401 	if (mr_type != IB_MR_TYPE_MEM_REG)
2402 		return ERR_PTR(-EINVAL);
2403 
2404 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2405 
2406 	if (IS_ERR(mr))
2407 		return ERR_PTR(-EINVAL);
2408 
2409 	dev = mr->dev;
2410 
2411 	return &mr->ibmr;
2412 }
2413 
2414 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2415 {
2416 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2417 	struct qedr_pbl *pbl_table;
2418 	struct regpair *pbe;
2419 	u32 pbes_in_page;
2420 
2421 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2422 		DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2423 		return -ENOMEM;
2424 	}
2425 
2426 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2427 		 mr->npages, addr);
2428 
2429 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2430 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2431 	pbe = (struct regpair *)pbl_table->va;
2432 	pbe +=  mr->npages % pbes_in_page;
2433 	pbe->lo = cpu_to_le32((u32)addr);
2434 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2435 
2436 	mr->npages++;
2437 
2438 	return 0;
2439 }
2440 
2441 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2442 {
2443 	int work = info->completed - info->completed_handled - 1;
2444 
2445 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2446 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2447 		struct qedr_pbl *pbl;
2448 
2449 		/* Free all the page list that are possible to be freed
2450 		 * (all the ones that were invalidated), under the assumption
2451 		 * that if an FMR was completed successfully that means that
2452 		 * if there was an invalidate operation before it also ended
2453 		 */
2454 		pbl = list_first_entry(&info->inuse_pbl_list,
2455 				       struct qedr_pbl, list_entry);
2456 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2457 		info->completed_handled++;
2458 	}
2459 }
2460 
2461 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2462 		   int sg_nents, unsigned int *sg_offset)
2463 {
2464 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2465 
2466 	mr->npages = 0;
2467 
2468 	handle_completed_mrs(mr->dev, &mr->info);
2469 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2470 }
2471 
2472 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2473 {
2474 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2475 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2476 	struct qedr_mr *mr;
2477 	int rc;
2478 
2479 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2480 	if (!mr)
2481 		return ERR_PTR(-ENOMEM);
2482 
2483 	mr->type = QEDR_MR_DMA;
2484 
2485 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2486 	if (rc) {
2487 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2488 		goto err1;
2489 	}
2490 
2491 	/* index only, 18 bit long, lkey = itid << 8 | key */
2492 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2493 	mr->hw_mr.pd = pd->pd_id;
2494 	mr->hw_mr.local_read = 1;
2495 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2496 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2497 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2498 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2499 	mr->hw_mr.dma_mr = true;
2500 
2501 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2502 	if (rc) {
2503 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2504 		goto err2;
2505 	}
2506 
2507 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2508 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2509 	    mr->hw_mr.remote_atomic)
2510 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2511 
2512 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2513 	return &mr->ibmr;
2514 
2515 err2:
2516 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2517 err1:
2518 	kfree(mr);
2519 	return ERR_PTR(rc);
2520 }
2521 
2522 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2523 {
2524 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2525 }
2526 
2527 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2528 {
2529 	int i, len = 0;
2530 
2531 	for (i = 0; i < num_sge; i++)
2532 		len += sg_list[i].length;
2533 
2534 	return len;
2535 }
2536 
2537 static void swap_wqe_data64(u64 *p)
2538 {
2539 	int i;
2540 
2541 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2542 		*p = cpu_to_be64(cpu_to_le64(*p));
2543 }
2544 
2545 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2546 				       struct qedr_qp *qp, u8 *wqe_size,
2547 				       struct ib_send_wr *wr,
2548 				       struct ib_send_wr **bad_wr, u8 *bits,
2549 				       u8 bit)
2550 {
2551 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2552 	char *seg_prt, *wqe;
2553 	int i, seg_siz;
2554 
2555 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2556 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2557 		*bad_wr = wr;
2558 		return 0;
2559 	}
2560 
2561 	if (!data_size)
2562 		return data_size;
2563 
2564 	*bits |= bit;
2565 
2566 	seg_prt = NULL;
2567 	wqe = NULL;
2568 	seg_siz = 0;
2569 
2570 	/* Copy data inline */
2571 	for (i = 0; i < wr->num_sge; i++) {
2572 		u32 len = wr->sg_list[i].length;
2573 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2574 
2575 		while (len > 0) {
2576 			u32 cur;
2577 
2578 			/* New segment required */
2579 			if (!seg_siz) {
2580 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2581 				seg_prt = wqe;
2582 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2583 				(*wqe_size)++;
2584 			}
2585 
2586 			/* Calculate currently allowed length */
2587 			cur = min_t(u32, len, seg_siz);
2588 			memcpy(seg_prt, src, cur);
2589 
2590 			/* Update segment variables */
2591 			seg_prt += cur;
2592 			seg_siz -= cur;
2593 
2594 			/* Update sge variables */
2595 			src += cur;
2596 			len -= cur;
2597 
2598 			/* Swap fully-completed segments */
2599 			if (!seg_siz)
2600 				swap_wqe_data64((u64 *)wqe);
2601 		}
2602 	}
2603 
2604 	/* swap last not completed segment */
2605 	if (seg_siz)
2606 		swap_wqe_data64((u64 *)wqe);
2607 
2608 	return data_size;
2609 }
2610 
2611 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2612 	do {							\
2613 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2614 		(sge)->length = cpu_to_le32(vlength);		\
2615 		(sge)->flags = cpu_to_le32(vflags);		\
2616 	} while (0)
2617 
2618 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2619 	do {							\
2620 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2621 		(hdr)->num_sges = num_sge;			\
2622 	} while (0)
2623 
2624 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2625 	do {							\
2626 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2627 		(sge)->length = cpu_to_le32(vlength);		\
2628 		(sge)->l_key = cpu_to_le32(vlkey);		\
2629 	} while (0)
2630 
2631 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2632 				struct ib_send_wr *wr)
2633 {
2634 	u32 data_size = 0;
2635 	int i;
2636 
2637 	for (i = 0; i < wr->num_sge; i++) {
2638 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2639 
2640 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2641 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2642 		sge->length = cpu_to_le32(wr->sg_list[i].length);
2643 		data_size += wr->sg_list[i].length;
2644 	}
2645 
2646 	if (wqe_size)
2647 		*wqe_size += wr->num_sge;
2648 
2649 	return data_size;
2650 }
2651 
2652 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2653 				     struct qedr_qp *qp,
2654 				     struct rdma_sq_rdma_wqe_1st *rwqe,
2655 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2656 				     struct ib_send_wr *wr,
2657 				     struct ib_send_wr **bad_wr)
2658 {
2659 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2660 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2661 
2662 	if (wr->send_flags & IB_SEND_INLINE &&
2663 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2664 	     wr->opcode == IB_WR_RDMA_WRITE)) {
2665 		u8 flags = 0;
2666 
2667 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2668 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2669 						   bad_wr, &rwqe->flags, flags);
2670 	}
2671 
2672 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2673 }
2674 
2675 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2676 				     struct qedr_qp *qp,
2677 				     struct rdma_sq_send_wqe_1st *swqe,
2678 				     struct rdma_sq_send_wqe_2st *swqe2,
2679 				     struct ib_send_wr *wr,
2680 				     struct ib_send_wr **bad_wr)
2681 {
2682 	memset(swqe2, 0, sizeof(*swqe2));
2683 	if (wr->send_flags & IB_SEND_INLINE) {
2684 		u8 flags = 0;
2685 
2686 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2687 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2688 						   bad_wr, &swqe->flags, flags);
2689 	}
2690 
2691 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2692 }
2693 
2694 static int qedr_prepare_reg(struct qedr_qp *qp,
2695 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2696 			    struct ib_reg_wr *wr)
2697 {
2698 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2699 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2700 
2701 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2702 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2703 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2704 	fwqe1->l_key = wr->key;
2705 
2706 	fwqe2->access_ctrl = 0;
2707 
2708 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2709 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2710 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2711 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2712 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2713 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2714 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2715 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2716 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2717 	fwqe2->fmr_ctrl = 0;
2718 
2719 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2720 		   ilog2(mr->ibmr.page_size) - 12);
2721 
2722 	fwqe2->length_hi = 0;
2723 	fwqe2->length_lo = mr->ibmr.length;
2724 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2725 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2726 
2727 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2728 
2729 	return 0;
2730 }
2731 
2732 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2733 {
2734 	switch (opcode) {
2735 	case IB_WR_RDMA_WRITE:
2736 	case IB_WR_RDMA_WRITE_WITH_IMM:
2737 		return IB_WC_RDMA_WRITE;
2738 	case IB_WR_SEND_WITH_IMM:
2739 	case IB_WR_SEND:
2740 	case IB_WR_SEND_WITH_INV:
2741 		return IB_WC_SEND;
2742 	case IB_WR_RDMA_READ:
2743 		return IB_WC_RDMA_READ;
2744 	case IB_WR_ATOMIC_CMP_AND_SWP:
2745 		return IB_WC_COMP_SWAP;
2746 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2747 		return IB_WC_FETCH_ADD;
2748 	case IB_WR_REG_MR:
2749 		return IB_WC_REG_MR;
2750 	case IB_WR_LOCAL_INV:
2751 		return IB_WC_LOCAL_INV;
2752 	default:
2753 		return IB_WC_SEND;
2754 	}
2755 }
2756 
2757 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2758 {
2759 	int wq_is_full, err_wr, pbl_is_full;
2760 	struct qedr_dev *dev = qp->dev;
2761 
2762 	/* prevent SQ overflow and/or processing of a bad WR */
2763 	err_wr = wr->num_sge > qp->sq.max_sges;
2764 	wq_is_full = qedr_wq_is_full(&qp->sq);
2765 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2766 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2767 	if (wq_is_full || err_wr || pbl_is_full) {
2768 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2769 			DP_ERR(dev,
2770 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2771 			       qp);
2772 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2773 		}
2774 
2775 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2776 			DP_ERR(dev,
2777 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2778 			       qp);
2779 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2780 		}
2781 
2782 		if (pbl_is_full &&
2783 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2784 			DP_ERR(dev,
2785 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2786 			       qp);
2787 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2788 		}
2789 		return false;
2790 	}
2791 	return true;
2792 }
2793 
2794 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2795 		     struct ib_send_wr **bad_wr)
2796 {
2797 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2798 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2799 	struct rdma_sq_atomic_wqe_1st *awqe1;
2800 	struct rdma_sq_atomic_wqe_2nd *awqe2;
2801 	struct rdma_sq_atomic_wqe_3rd *awqe3;
2802 	struct rdma_sq_send_wqe_2st *swqe2;
2803 	struct rdma_sq_local_inv_wqe *iwqe;
2804 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2805 	struct rdma_sq_send_wqe_1st *swqe;
2806 	struct rdma_sq_rdma_wqe_1st *rwqe;
2807 	struct rdma_sq_fmr_wqe_1st *fwqe1;
2808 	struct rdma_sq_common_wqe *wqe;
2809 	u32 length;
2810 	int rc = 0;
2811 	bool comp;
2812 
2813 	if (!qedr_can_post_send(qp, wr)) {
2814 		*bad_wr = wr;
2815 		return -ENOMEM;
2816 	}
2817 
2818 	wqe = qed_chain_produce(&qp->sq.pbl);
2819 	qp->wqe_wr_id[qp->sq.prod].signaled =
2820 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2821 
2822 	wqe->flags = 0;
2823 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2824 		   !!(wr->send_flags & IB_SEND_SOLICITED));
2825 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2826 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2827 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2828 		   !!(wr->send_flags & IB_SEND_FENCE));
2829 	wqe->prev_wqe_size = qp->prev_wqe_size;
2830 
2831 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2832 
2833 	switch (wr->opcode) {
2834 	case IB_WR_SEND_WITH_IMM:
2835 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
2836 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2837 		swqe->wqe_size = 2;
2838 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2839 
2840 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
2841 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2842 						   wr, bad_wr);
2843 		swqe->length = cpu_to_le32(length);
2844 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2845 		qp->prev_wqe_size = swqe->wqe_size;
2846 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2847 		break;
2848 	case IB_WR_SEND:
2849 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
2850 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2851 
2852 		swqe->wqe_size = 2;
2853 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2854 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2855 						   wr, bad_wr);
2856 		swqe->length = cpu_to_le32(length);
2857 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2858 		qp->prev_wqe_size = swqe->wqe_size;
2859 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2860 		break;
2861 	case IB_WR_SEND_WITH_INV:
2862 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
2863 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2864 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2865 		swqe->wqe_size = 2;
2866 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
2867 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2868 						   wr, bad_wr);
2869 		swqe->length = cpu_to_le32(length);
2870 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2871 		qp->prev_wqe_size = swqe->wqe_size;
2872 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2873 		break;
2874 
2875 	case IB_WR_RDMA_WRITE_WITH_IMM:
2876 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
2877 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2878 
2879 		rwqe->wqe_size = 2;
2880 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
2881 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2882 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2883 						   wr, bad_wr);
2884 		rwqe->length = cpu_to_le32(length);
2885 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2886 		qp->prev_wqe_size = rwqe->wqe_size;
2887 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2888 		break;
2889 	case IB_WR_RDMA_WRITE:
2890 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
2891 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2892 
2893 		rwqe->wqe_size = 2;
2894 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2895 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2896 						   wr, bad_wr);
2897 		rwqe->length = cpu_to_le32(length);
2898 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2899 		qp->prev_wqe_size = rwqe->wqe_size;
2900 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2901 		break;
2902 	case IB_WR_RDMA_READ_WITH_INV:
2903 		DP_ERR(dev,
2904 		       "RDMA READ WITH INVALIDATE not supported\n");
2905 		*bad_wr = wr;
2906 		rc = -EINVAL;
2907 		break;
2908 
2909 	case IB_WR_RDMA_READ:
2910 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
2911 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2912 
2913 		rwqe->wqe_size = 2;
2914 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2915 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2916 						   wr, bad_wr);
2917 		rwqe->length = cpu_to_le32(length);
2918 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2919 		qp->prev_wqe_size = rwqe->wqe_size;
2920 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2921 		break;
2922 
2923 	case IB_WR_ATOMIC_CMP_AND_SWP:
2924 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2925 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
2926 		awqe1->wqe_size = 4;
2927 
2928 		awqe2 = qed_chain_produce(&qp->sq.pbl);
2929 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
2930 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
2931 
2932 		awqe3 = qed_chain_produce(&qp->sq.pbl);
2933 
2934 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2935 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
2936 			DMA_REGPAIR_LE(awqe3->swap_data,
2937 				       atomic_wr(wr)->compare_add);
2938 		} else {
2939 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
2940 			DMA_REGPAIR_LE(awqe3->swap_data,
2941 				       atomic_wr(wr)->swap);
2942 			DMA_REGPAIR_LE(awqe3->cmp_data,
2943 				       atomic_wr(wr)->compare_add);
2944 		}
2945 
2946 		qedr_prepare_sq_sges(qp, NULL, wr);
2947 
2948 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
2949 		qp->prev_wqe_size = awqe1->wqe_size;
2950 		break;
2951 
2952 	case IB_WR_LOCAL_INV:
2953 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
2954 		iwqe->wqe_size = 1;
2955 
2956 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
2957 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
2958 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
2959 		qp->prev_wqe_size = iwqe->wqe_size;
2960 		break;
2961 	case IB_WR_REG_MR:
2962 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
2963 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
2964 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
2965 		fwqe1->wqe_size = 2;
2966 
2967 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
2968 		if (rc) {
2969 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
2970 			*bad_wr = wr;
2971 			break;
2972 		}
2973 
2974 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
2975 		qp->prev_wqe_size = fwqe1->wqe_size;
2976 		break;
2977 	default:
2978 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
2979 		rc = -EINVAL;
2980 		*bad_wr = wr;
2981 		break;
2982 	}
2983 
2984 	if (*bad_wr) {
2985 		u16 value;
2986 
2987 		/* Restore prod to its position before
2988 		 * this WR was processed
2989 		 */
2990 		value = le16_to_cpu(qp->sq.db_data.data.value);
2991 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
2992 
2993 		/* Restore prev_wqe_size */
2994 		qp->prev_wqe_size = wqe->prev_wqe_size;
2995 		rc = -EINVAL;
2996 		DP_ERR(dev, "POST SEND FAILED\n");
2997 	}
2998 
2999 	return rc;
3000 }
3001 
3002 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3003 		   struct ib_send_wr **bad_wr)
3004 {
3005 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3006 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3007 	unsigned long flags;
3008 	int rc = 0;
3009 
3010 	*bad_wr = NULL;
3011 
3012 	if (qp->qp_type == IB_QPT_GSI)
3013 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3014 
3015 	spin_lock_irqsave(&qp->q_lock, flags);
3016 
3017 	if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3018 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3019 	    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3020 		spin_unlock_irqrestore(&qp->q_lock, flags);
3021 		*bad_wr = wr;
3022 		DP_DEBUG(dev, QEDR_MSG_CQ,
3023 			 "QP in wrong state! QP icid=0x%x state %d\n",
3024 			 qp->icid, qp->state);
3025 		return -EINVAL;
3026 	}
3027 
3028 	while (wr) {
3029 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3030 		if (rc)
3031 			break;
3032 
3033 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3034 
3035 		qedr_inc_sw_prod(&qp->sq);
3036 
3037 		qp->sq.db_data.data.value++;
3038 
3039 		wr = wr->next;
3040 	}
3041 
3042 	/* Trigger doorbell
3043 	 * If there was a failure in the first WR then it will be triggered in
3044 	 * vane. However this is not harmful (as long as the producer value is
3045 	 * unchanged). For performance reasons we avoid checking for this
3046 	 * redundant doorbell.
3047 	 */
3048 	wmb();
3049 	writel(qp->sq.db_data.raw, qp->sq.db);
3050 
3051 	/* Make sure write sticks */
3052 	mmiowb();
3053 
3054 	spin_unlock_irqrestore(&qp->q_lock, flags);
3055 
3056 	return rc;
3057 }
3058 
3059 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3060 		   struct ib_recv_wr **bad_wr)
3061 {
3062 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3063 	struct qedr_dev *dev = qp->dev;
3064 	unsigned long flags;
3065 	int status = 0;
3066 
3067 	if (qp->qp_type == IB_QPT_GSI)
3068 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3069 
3070 	spin_lock_irqsave(&qp->q_lock, flags);
3071 
3072 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3073 		spin_unlock_irqrestore(&qp->q_lock, flags);
3074 		*bad_wr = wr;
3075 		return -EINVAL;
3076 	}
3077 
3078 	while (wr) {
3079 		int i;
3080 
3081 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3082 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3083 		    wr->num_sge > qp->rq.max_sges) {
3084 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3085 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3086 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3087 			       qp->rq.max_sges);
3088 			status = -ENOMEM;
3089 			*bad_wr = wr;
3090 			break;
3091 		}
3092 		for (i = 0; i < wr->num_sge; i++) {
3093 			u32 flags = 0;
3094 			struct rdma_rq_sge *rqe =
3095 			    qed_chain_produce(&qp->rq.pbl);
3096 
3097 			/* First one must include the number
3098 			 * of SGE in the list
3099 			 */
3100 			if (!i)
3101 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3102 					  wr->num_sge);
3103 
3104 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3105 				  wr->sg_list[i].lkey);
3106 
3107 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3108 				   wr->sg_list[i].length, flags);
3109 		}
3110 
3111 		/* Special case of no sges. FW requires between 1-4 sges...
3112 		 * in this case we need to post 1 sge with length zero. this is
3113 		 * because rdma write with immediate consumes an RQ.
3114 		 */
3115 		if (!wr->num_sge) {
3116 			u32 flags = 0;
3117 			struct rdma_rq_sge *rqe =
3118 			    qed_chain_produce(&qp->rq.pbl);
3119 
3120 			/* First one must include the number
3121 			 * of SGE in the list
3122 			 */
3123 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3124 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3125 
3126 			RQ_SGE_SET(rqe, 0, 0, flags);
3127 			i = 1;
3128 		}
3129 
3130 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3131 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3132 
3133 		qedr_inc_sw_prod(&qp->rq);
3134 
3135 		/* Flush all the writes before signalling doorbell */
3136 		wmb();
3137 
3138 		qp->rq.db_data.data.value++;
3139 
3140 		writel(qp->rq.db_data.raw, qp->rq.db);
3141 
3142 		/* Make sure write sticks */
3143 		mmiowb();
3144 
3145 		wr = wr->next;
3146 	}
3147 
3148 	spin_unlock_irqrestore(&qp->q_lock, flags);
3149 
3150 	return status;
3151 }
3152 
3153 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3154 {
3155 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3156 
3157 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3158 		cq->pbl_toggle;
3159 }
3160 
3161 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3162 {
3163 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3164 	struct qedr_qp *qp;
3165 
3166 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3167 						   resp_cqe->qp_handle.lo,
3168 						   u64);
3169 	return qp;
3170 }
3171 
3172 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3173 {
3174 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3175 
3176 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3177 }
3178 
3179 /* Return latest CQE (needs processing) */
3180 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3181 {
3182 	return cq->latest_cqe;
3183 }
3184 
3185 /* In fmr we need to increase the number of fmr completed counter for the fmr
3186  * algorithm determining whether we can free a pbl or not.
3187  * we need to perform this whether the work request was signaled or not. for
3188  * this purpose we call this function from the condition that checks if a wr
3189  * should be skipped, to make sure we don't miss it ( possibly this fmr
3190  * operation was not signalted)
3191  */
3192 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3193 {
3194 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3195 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3196 }
3197 
3198 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3199 		       struct qedr_cq *cq, int num_entries,
3200 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3201 		       int force)
3202 {
3203 	u16 cnt = 0;
3204 
3205 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3206 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3207 			qedr_chk_if_fmr(qp);
3208 			/* skip WC */
3209 			goto next_cqe;
3210 		}
3211 
3212 		/* fill WC */
3213 		wc->status = status;
3214 		wc->vendor_err = 0;
3215 		wc->wc_flags = 0;
3216 		wc->src_qp = qp->id;
3217 		wc->qp = &qp->ibqp;
3218 
3219 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3220 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3221 
3222 		switch (wc->opcode) {
3223 		case IB_WC_RDMA_WRITE:
3224 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3225 			break;
3226 		case IB_WC_COMP_SWAP:
3227 		case IB_WC_FETCH_ADD:
3228 			wc->byte_len = 8;
3229 			break;
3230 		case IB_WC_REG_MR:
3231 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3232 			break;
3233 		case IB_WC_RDMA_READ:
3234 		case IB_WC_SEND:
3235 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3236 			break;
3237 		default:
3238 			break;
3239 		}
3240 
3241 		num_entries--;
3242 		wc++;
3243 		cnt++;
3244 next_cqe:
3245 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3246 			qed_chain_consume(&qp->sq.pbl);
3247 		qedr_inc_sw_cons(&qp->sq);
3248 	}
3249 
3250 	return cnt;
3251 }
3252 
3253 static int qedr_poll_cq_req(struct qedr_dev *dev,
3254 			    struct qedr_qp *qp, struct qedr_cq *cq,
3255 			    int num_entries, struct ib_wc *wc,
3256 			    struct rdma_cqe_requester *req)
3257 {
3258 	int cnt = 0;
3259 
3260 	switch (req->status) {
3261 	case RDMA_CQE_REQ_STS_OK:
3262 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3263 				  IB_WC_SUCCESS, 0);
3264 		break;
3265 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3266 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3267 			DP_ERR(dev,
3268 			       "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3269 			       cq->icid, qp->icid);
3270 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3271 				  IB_WC_WR_FLUSH_ERR, 1);
3272 		break;
3273 	default:
3274 		/* process all WQE before the cosumer */
3275 		qp->state = QED_ROCE_QP_STATE_ERR;
3276 		cnt = process_req(dev, qp, cq, num_entries, wc,
3277 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3278 		wc += cnt;
3279 		/* if we have extra WC fill it with actual error info */
3280 		if (cnt < num_entries) {
3281 			enum ib_wc_status wc_status;
3282 
3283 			switch (req->status) {
3284 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3285 				DP_ERR(dev,
3286 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3287 				       cq->icid, qp->icid);
3288 				wc_status = IB_WC_BAD_RESP_ERR;
3289 				break;
3290 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3291 				DP_ERR(dev,
3292 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3293 				       cq->icid, qp->icid);
3294 				wc_status = IB_WC_LOC_LEN_ERR;
3295 				break;
3296 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3297 				DP_ERR(dev,
3298 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3299 				       cq->icid, qp->icid);
3300 				wc_status = IB_WC_LOC_QP_OP_ERR;
3301 				break;
3302 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3303 				DP_ERR(dev,
3304 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3305 				       cq->icid, qp->icid);
3306 				wc_status = IB_WC_LOC_PROT_ERR;
3307 				break;
3308 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3309 				DP_ERR(dev,
3310 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3311 				       cq->icid, qp->icid);
3312 				wc_status = IB_WC_MW_BIND_ERR;
3313 				break;
3314 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3315 				DP_ERR(dev,
3316 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3317 				       cq->icid, qp->icid);
3318 				wc_status = IB_WC_REM_INV_REQ_ERR;
3319 				break;
3320 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3321 				DP_ERR(dev,
3322 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3323 				       cq->icid, qp->icid);
3324 				wc_status = IB_WC_REM_ACCESS_ERR;
3325 				break;
3326 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3327 				DP_ERR(dev,
3328 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3329 				       cq->icid, qp->icid);
3330 				wc_status = IB_WC_REM_OP_ERR;
3331 				break;
3332 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3333 				DP_ERR(dev,
3334 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3335 				       cq->icid, qp->icid);
3336 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3337 				break;
3338 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3339 				DP_ERR(dev,
3340 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3341 				       cq->icid, qp->icid);
3342 				wc_status = IB_WC_RETRY_EXC_ERR;
3343 				break;
3344 			default:
3345 				DP_ERR(dev,
3346 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3347 				       cq->icid, qp->icid);
3348 				wc_status = IB_WC_GENERAL_ERR;
3349 			}
3350 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3351 					   wc_status, 1);
3352 		}
3353 	}
3354 
3355 	return cnt;
3356 }
3357 
3358 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3359 {
3360 	switch (status) {
3361 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3362 		return IB_WC_LOC_ACCESS_ERR;
3363 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3364 		return IB_WC_LOC_LEN_ERR;
3365 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3366 		return IB_WC_LOC_QP_OP_ERR;
3367 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3368 		return IB_WC_LOC_PROT_ERR;
3369 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3370 		return IB_WC_MW_BIND_ERR;
3371 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3372 		return IB_WC_REM_INV_RD_REQ_ERR;
3373 	case RDMA_CQE_RESP_STS_OK:
3374 		return IB_WC_SUCCESS;
3375 	default:
3376 		return IB_WC_GENERAL_ERR;
3377 	}
3378 }
3379 
3380 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3381 					  struct ib_wc *wc)
3382 {
3383 	wc->status = IB_WC_SUCCESS;
3384 	wc->byte_len = le32_to_cpu(resp->length);
3385 
3386 	if (resp->flags & QEDR_RESP_IMM) {
3387 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3388 		wc->wc_flags |= IB_WC_WITH_IMM;
3389 
3390 		if (resp->flags & QEDR_RESP_RDMA)
3391 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3392 
3393 		if (resp->flags & QEDR_RESP_INV)
3394 			return -EINVAL;
3395 
3396 	} else if (resp->flags & QEDR_RESP_INV) {
3397 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3398 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3399 
3400 		if (resp->flags & QEDR_RESP_RDMA)
3401 			return -EINVAL;
3402 
3403 	} else if (resp->flags & QEDR_RESP_RDMA) {
3404 		return -EINVAL;
3405 	}
3406 
3407 	return 0;
3408 }
3409 
3410 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3411 			       struct qedr_cq *cq, struct ib_wc *wc,
3412 			       struct rdma_cqe_responder *resp, u64 wr_id)
3413 {
3414 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3415 	wc->opcode = IB_WC_RECV;
3416 	wc->wc_flags = 0;
3417 
3418 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3419 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3420 			DP_ERR(dev,
3421 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3422 			       cq, cq->icid, resp->flags);
3423 
3424 	} else {
3425 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3426 		if (wc->status == IB_WC_GENERAL_ERR)
3427 			DP_ERR(dev,
3428 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3429 			       cq, cq->icid, resp->status);
3430 	}
3431 
3432 	/* Fill the rest of the WC */
3433 	wc->vendor_err = 0;
3434 	wc->src_qp = qp->id;
3435 	wc->qp = &qp->ibqp;
3436 	wc->wr_id = wr_id;
3437 }
3438 
3439 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3440 			    struct qedr_cq *cq, struct ib_wc *wc,
3441 			    struct rdma_cqe_responder *resp)
3442 {
3443 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3444 
3445 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3446 
3447 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3448 		qed_chain_consume(&qp->rq.pbl);
3449 	qedr_inc_sw_cons(&qp->rq);
3450 
3451 	return 1;
3452 }
3453 
3454 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3455 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3456 {
3457 	u16 cnt = 0;
3458 
3459 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3460 		/* fill WC */
3461 		wc->status = IB_WC_WR_FLUSH_ERR;
3462 		wc->vendor_err = 0;
3463 		wc->wc_flags = 0;
3464 		wc->src_qp = qp->id;
3465 		wc->byte_len = 0;
3466 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3467 		wc->qp = &qp->ibqp;
3468 		num_entries--;
3469 		wc++;
3470 		cnt++;
3471 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3472 			qed_chain_consume(&qp->rq.pbl);
3473 		qedr_inc_sw_cons(&qp->rq);
3474 	}
3475 
3476 	return cnt;
3477 }
3478 
3479 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3480 				 struct rdma_cqe_responder *resp, int *update)
3481 {
3482 	if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
3483 		consume_cqe(cq);
3484 		*update |= 1;
3485 	}
3486 }
3487 
3488 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3489 			     struct qedr_cq *cq, int num_entries,
3490 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3491 			     int *update)
3492 {
3493 	int cnt;
3494 
3495 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3496 		cnt = process_resp_flush(qp, cq, num_entries, wc,
3497 					 resp->rq_cons);
3498 		try_consume_resp_cqe(cq, qp, resp, update);
3499 	} else {
3500 		cnt = process_resp_one(dev, qp, cq, wc, resp);
3501 		consume_cqe(cq);
3502 		*update |= 1;
3503 	}
3504 
3505 	return cnt;
3506 }
3507 
3508 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3509 				struct rdma_cqe_requester *req, int *update)
3510 {
3511 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3512 		consume_cqe(cq);
3513 		*update |= 1;
3514 	}
3515 }
3516 
3517 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3518 {
3519 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3520 	struct qedr_cq *cq = get_qedr_cq(ibcq);
3521 	union rdma_cqe *cqe = cq->latest_cqe;
3522 	u32 old_cons, new_cons;
3523 	unsigned long flags;
3524 	int update = 0;
3525 	int done = 0;
3526 
3527 	if (cq->destroyed) {
3528 		DP_ERR(dev,
3529 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3530 		       cq, cq->icid);
3531 		return 0;
3532 	}
3533 
3534 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3535 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3536 
3537 	spin_lock_irqsave(&cq->cq_lock, flags);
3538 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3539 	while (num_entries && is_valid_cqe(cq, cqe)) {
3540 		struct qedr_qp *qp;
3541 		int cnt = 0;
3542 
3543 		/* prevent speculative reads of any field of CQE */
3544 		rmb();
3545 
3546 		qp = cqe_get_qp(cqe);
3547 		if (!qp) {
3548 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3549 			break;
3550 		}
3551 
3552 		wc->qp = &qp->ibqp;
3553 
3554 		switch (cqe_get_type(cqe)) {
3555 		case RDMA_CQE_TYPE_REQUESTER:
3556 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3557 					       &cqe->req);
3558 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3559 			break;
3560 		case RDMA_CQE_TYPE_RESPONDER_RQ:
3561 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3562 						&cqe->resp, &update);
3563 			break;
3564 		case RDMA_CQE_TYPE_INVALID:
3565 		default:
3566 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3567 			       cqe_get_type(cqe));
3568 		}
3569 		num_entries -= cnt;
3570 		wc += cnt;
3571 		done += cnt;
3572 
3573 		cqe = get_cqe(cq);
3574 	}
3575 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3576 
3577 	cq->cq_cons += new_cons - old_cons;
3578 
3579 	if (update)
3580 		/* doorbell notifies abount latest VALID entry,
3581 		 * but chain already point to the next INVALID one
3582 		 */
3583 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3584 
3585 	spin_unlock_irqrestore(&cq->cq_lock, flags);
3586 	return done;
3587 }
3588 
3589 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3590 		     u8 port_num,
3591 		     const struct ib_wc *in_wc,
3592 		     const struct ib_grh *in_grh,
3593 		     const struct ib_mad_hdr *mad_hdr,
3594 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3595 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3596 {
3597 	struct qedr_dev *dev = get_qedr_dev(ibdev);
3598 
3599 	DP_DEBUG(dev, QEDR_MSG_GSI,
3600 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3601 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3602 		 mad_hdr->class_specific, mad_hdr->class_version,
3603 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3604 	return IB_MAD_RESULT_SUCCESS;
3605 }
3606 
3607 int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
3608 			struct ib_port_immutable *immutable)
3609 {
3610 	struct ib_port_attr attr;
3611 	int err;
3612 
3613 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3614 				    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3615 
3616 	err = ib_query_port(ibdev, port_num, &attr);
3617 	if (err)
3618 		return err;
3619 
3620 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
3621 	immutable->gid_tbl_len = attr.gid_tbl_len;
3622 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
3623 
3624 	return 0;
3625 }
3626