xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision f7d84fa7)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_cm.h"
53 
54 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 
56 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
57 {
58 	if (index > QEDR_ROCE_PKEY_TABLE_LEN)
59 		return -EINVAL;
60 
61 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
62 	return 0;
63 }
64 
65 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
66 		   union ib_gid *sgid)
67 {
68 	struct qedr_dev *dev = get_qedr_dev(ibdev);
69 	int rc = 0;
70 
71 	if (!rdma_cap_roce_gid_table(ibdev, port))
72 		return -ENODEV;
73 
74 	rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
75 	if (rc == -EAGAIN) {
76 		memcpy(sgid, &zgid, sizeof(*sgid));
77 		return 0;
78 	}
79 
80 	DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
81 		 sgid->global.interface_id, sgid->global.subnet_prefix);
82 
83 	return rc;
84 }
85 
86 int qedr_add_gid(struct ib_device *device, u8 port_num,
87 		 unsigned int index, const union ib_gid *gid,
88 		 const struct ib_gid_attr *attr, void **context)
89 {
90 	if (!rdma_cap_roce_gid_table(device, port_num))
91 		return -EINVAL;
92 
93 	if (port_num > QEDR_MAX_PORT)
94 		return -EINVAL;
95 
96 	if (!context)
97 		return -EINVAL;
98 
99 	return 0;
100 }
101 
102 int qedr_del_gid(struct ib_device *device, u8 port_num,
103 		 unsigned int index, void **context)
104 {
105 	if (!rdma_cap_roce_gid_table(device, port_num))
106 		return -EINVAL;
107 
108 	if (port_num > QEDR_MAX_PORT)
109 		return -EINVAL;
110 
111 	if (!context)
112 		return -EINVAL;
113 
114 	return 0;
115 }
116 
117 int qedr_query_device(struct ib_device *ibdev,
118 		      struct ib_device_attr *attr, struct ib_udata *udata)
119 {
120 	struct qedr_dev *dev = get_qedr_dev(ibdev);
121 	struct qedr_device_attr *qattr = &dev->attr;
122 
123 	if (!dev->rdma_ctx) {
124 		DP_ERR(dev,
125 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
126 		       dev->rdma_ctx);
127 		return -EINVAL;
128 	}
129 
130 	memset(attr, 0, sizeof(*attr));
131 
132 	attr->fw_ver = qattr->fw_ver;
133 	attr->sys_image_guid = qattr->sys_image_guid;
134 	attr->max_mr_size = qattr->max_mr_size;
135 	attr->page_size_cap = qattr->page_size_caps;
136 	attr->vendor_id = qattr->vendor_id;
137 	attr->vendor_part_id = qattr->vendor_part_id;
138 	attr->hw_ver = qattr->hw_ver;
139 	attr->max_qp = qattr->max_qp;
140 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
141 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
142 	    IB_DEVICE_RC_RNR_NAK_GEN |
143 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
144 
145 	attr->max_sge = qattr->max_sge;
146 	attr->max_sge_rd = qattr->max_sge;
147 	attr->max_cq = qattr->max_cq;
148 	attr->max_cqe = qattr->max_cqe;
149 	attr->max_mr = qattr->max_mr;
150 	attr->max_mw = qattr->max_mw;
151 	attr->max_pd = qattr->max_pd;
152 	attr->atomic_cap = dev->atomic_cap;
153 	attr->max_fmr = qattr->max_fmr;
154 	attr->max_map_per_fmr = 16;
155 	attr->max_qp_init_rd_atom =
156 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
157 	attr->max_qp_rd_atom =
158 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
159 		attr->max_qp_init_rd_atom);
160 
161 	attr->max_srq = qattr->max_srq;
162 	attr->max_srq_sge = qattr->max_srq_sge;
163 	attr->max_srq_wr = qattr->max_srq_wr;
164 
165 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
166 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
167 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
168 	attr->max_ah = qattr->max_ah;
169 
170 	return 0;
171 }
172 
173 #define QEDR_SPEED_SDR		(1)
174 #define QEDR_SPEED_DDR		(2)
175 #define QEDR_SPEED_QDR		(4)
176 #define QEDR_SPEED_FDR10	(8)
177 #define QEDR_SPEED_FDR		(16)
178 #define QEDR_SPEED_EDR		(32)
179 
180 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
181 					    u8 *ib_width)
182 {
183 	switch (speed) {
184 	case 1000:
185 		*ib_speed = QEDR_SPEED_SDR;
186 		*ib_width = IB_WIDTH_1X;
187 		break;
188 	case 10000:
189 		*ib_speed = QEDR_SPEED_QDR;
190 		*ib_width = IB_WIDTH_1X;
191 		break;
192 
193 	case 20000:
194 		*ib_speed = QEDR_SPEED_DDR;
195 		*ib_width = IB_WIDTH_4X;
196 		break;
197 
198 	case 25000:
199 		*ib_speed = QEDR_SPEED_EDR;
200 		*ib_width = IB_WIDTH_1X;
201 		break;
202 
203 	case 40000:
204 		*ib_speed = QEDR_SPEED_QDR;
205 		*ib_width = IB_WIDTH_4X;
206 		break;
207 
208 	case 50000:
209 		*ib_speed = QEDR_SPEED_QDR;
210 		*ib_width = IB_WIDTH_4X;
211 		break;
212 
213 	case 100000:
214 		*ib_speed = QEDR_SPEED_EDR;
215 		*ib_width = IB_WIDTH_4X;
216 		break;
217 
218 	default:
219 		/* Unsupported */
220 		*ib_speed = QEDR_SPEED_SDR;
221 		*ib_width = IB_WIDTH_1X;
222 	}
223 }
224 
225 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
226 {
227 	struct qedr_dev *dev;
228 	struct qed_rdma_port *rdma_port;
229 
230 	dev = get_qedr_dev(ibdev);
231 	if (port > 1) {
232 		DP_ERR(dev, "invalid_port=0x%x\n", port);
233 		return -EINVAL;
234 	}
235 
236 	if (!dev->rdma_ctx) {
237 		DP_ERR(dev, "rdma_ctx is NULL\n");
238 		return -EINVAL;
239 	}
240 
241 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
242 
243 	/* *attr being zeroed by the caller, avoid zeroing it here */
244 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
245 		attr->state = IB_PORT_ACTIVE;
246 		attr->phys_state = 5;
247 	} else {
248 		attr->state = IB_PORT_DOWN;
249 		attr->phys_state = 3;
250 	}
251 	attr->max_mtu = IB_MTU_4096;
252 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
253 	attr->lid = 0;
254 	attr->lmc = 0;
255 	attr->sm_lid = 0;
256 	attr->sm_sl = 0;
257 	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
258 	attr->gid_tbl_len = QEDR_MAX_SGID;
259 	attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
260 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
261 	attr->qkey_viol_cntr = 0;
262 	get_link_speed_and_width(rdma_port->link_speed,
263 				 &attr->active_speed, &attr->active_width);
264 	attr->max_msg_sz = rdma_port->max_msg_size;
265 	attr->max_vl_num = 4;
266 
267 	return 0;
268 }
269 
270 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
271 		     struct ib_port_modify *props)
272 {
273 	struct qedr_dev *dev;
274 
275 	dev = get_qedr_dev(ibdev);
276 	if (port > 1) {
277 		DP_ERR(dev, "invalid_port=0x%x\n", port);
278 		return -EINVAL;
279 	}
280 
281 	return 0;
282 }
283 
284 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
285 			 unsigned long len)
286 {
287 	struct qedr_mm *mm;
288 
289 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
290 	if (!mm)
291 		return -ENOMEM;
292 
293 	mm->key.phy_addr = phy_addr;
294 	/* This function might be called with a length which is not a multiple
295 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
296 	 * forces this granularity by increasing the requested size if needed.
297 	 * When qedr_mmap is called, it will search the list with the updated
298 	 * length as a key. To prevent search failures, the length is rounded up
299 	 * in advance to PAGE_SIZE.
300 	 */
301 	mm->key.len = roundup(len, PAGE_SIZE);
302 	INIT_LIST_HEAD(&mm->entry);
303 
304 	mutex_lock(&uctx->mm_list_lock);
305 	list_add(&mm->entry, &uctx->mm_head);
306 	mutex_unlock(&uctx->mm_list_lock);
307 
308 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
309 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
310 		 (unsigned long long)mm->key.phy_addr,
311 		 (unsigned long)mm->key.len, uctx);
312 
313 	return 0;
314 }
315 
316 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
317 			     unsigned long len)
318 {
319 	bool found = false;
320 	struct qedr_mm *mm;
321 
322 	mutex_lock(&uctx->mm_list_lock);
323 	list_for_each_entry(mm, &uctx->mm_head, entry) {
324 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
325 			continue;
326 
327 		found = true;
328 		break;
329 	}
330 	mutex_unlock(&uctx->mm_list_lock);
331 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
332 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
333 		 mm->key.phy_addr, mm->key.len, uctx, found);
334 
335 	return found;
336 }
337 
338 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
339 					struct ib_udata *udata)
340 {
341 	int rc;
342 	struct qedr_ucontext *ctx;
343 	struct qedr_alloc_ucontext_resp uresp;
344 	struct qedr_dev *dev = get_qedr_dev(ibdev);
345 	struct qed_rdma_add_user_out_params oparams;
346 
347 	if (!udata)
348 		return ERR_PTR(-EFAULT);
349 
350 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
351 	if (!ctx)
352 		return ERR_PTR(-ENOMEM);
353 
354 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
355 	if (rc) {
356 		DP_ERR(dev,
357 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
358 		       rc);
359 		goto err;
360 	}
361 
362 	ctx->dpi = oparams.dpi;
363 	ctx->dpi_addr = oparams.dpi_addr;
364 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
365 	ctx->dpi_size = oparams.dpi_size;
366 	INIT_LIST_HEAD(&ctx->mm_head);
367 	mutex_init(&ctx->mm_list_lock);
368 
369 	memset(&uresp, 0, sizeof(uresp));
370 
371 	uresp.db_pa = ctx->dpi_phys_addr;
372 	uresp.db_size = ctx->dpi_size;
373 	uresp.max_send_wr = dev->attr.max_sqe;
374 	uresp.max_recv_wr = dev->attr.max_rqe;
375 	uresp.max_srq_wr = dev->attr.max_srq_wr;
376 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
377 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
378 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
379 	uresp.max_cqes = QEDR_MAX_CQES;
380 
381 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
382 	if (rc)
383 		goto err;
384 
385 	ctx->dev = dev;
386 
387 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
388 	if (rc)
389 		goto err;
390 
391 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
392 		 &ctx->ibucontext);
393 	return &ctx->ibucontext;
394 
395 err:
396 	kfree(ctx);
397 	return ERR_PTR(rc);
398 }
399 
400 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
401 {
402 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
403 	struct qedr_mm *mm, *tmp;
404 	int status = 0;
405 
406 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
407 		 uctx);
408 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
409 
410 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
411 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
412 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
413 			 mm->key.phy_addr, mm->key.len, uctx);
414 		list_del(&mm->entry);
415 		kfree(mm);
416 	}
417 
418 	kfree(uctx);
419 	return status;
420 }
421 
422 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
423 {
424 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
425 	struct qedr_dev *dev = get_qedr_dev(context->device);
426 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
427 	u64 unmapped_db = dev->db_phys_addr;
428 	unsigned long len = (vma->vm_end - vma->vm_start);
429 	int rc = 0;
430 	bool found;
431 
432 	DP_DEBUG(dev, QEDR_MSG_INIT,
433 		 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
434 		 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
435 	if (vma->vm_start & (PAGE_SIZE - 1)) {
436 		DP_ERR(dev, "Vma_start not page aligned = %ld\n",
437 		       vma->vm_start);
438 		return -EINVAL;
439 	}
440 
441 	found = qedr_search_mmap(ucontext, vm_page, len);
442 	if (!found) {
443 		DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
444 		       vma->vm_pgoff);
445 		return -EINVAL;
446 	}
447 
448 	DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
449 
450 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
451 						     dev->db_size))) {
452 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
453 		if (vma->vm_flags & VM_READ) {
454 			DP_ERR(dev, "Trying to map doorbell bar for read\n");
455 			return -EPERM;
456 		}
457 
458 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
459 
460 		rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
461 					PAGE_SIZE, vma->vm_page_prot);
462 	} else {
463 		DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
464 		rc = remap_pfn_range(vma, vma->vm_start,
465 				     vma->vm_pgoff, len, vma->vm_page_prot);
466 	}
467 	DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
468 	return rc;
469 }
470 
471 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
472 			    struct ib_ucontext *context, struct ib_udata *udata)
473 {
474 	struct qedr_dev *dev = get_qedr_dev(ibdev);
475 	struct qedr_pd *pd;
476 	u16 pd_id;
477 	int rc;
478 
479 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
480 		 (udata && context) ? "User Lib" : "Kernel");
481 
482 	if (!dev->rdma_ctx) {
483 		DP_ERR(dev, "invlaid RDMA context\n");
484 		return ERR_PTR(-EINVAL);
485 	}
486 
487 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
488 	if (!pd)
489 		return ERR_PTR(-ENOMEM);
490 
491 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
492 	if (rc)
493 		goto err;
494 
495 	pd->pd_id = pd_id;
496 
497 	if (udata && context) {
498 		struct qedr_alloc_pd_uresp uresp;
499 
500 		uresp.pd_id = pd_id;
501 
502 		rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
503 		if (rc) {
504 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
505 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
506 			goto err;
507 		}
508 
509 		pd->uctx = get_qedr_ucontext(context);
510 		pd->uctx->pd = pd;
511 	}
512 
513 	return &pd->ibpd;
514 
515 err:
516 	kfree(pd);
517 	return ERR_PTR(rc);
518 }
519 
520 int qedr_dealloc_pd(struct ib_pd *ibpd)
521 {
522 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
523 	struct qedr_pd *pd = get_qedr_pd(ibpd);
524 
525 	if (!pd) {
526 		pr_err("Invalid PD received in dealloc_pd\n");
527 		return -EINVAL;
528 	}
529 
530 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
531 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
532 
533 	kfree(pd);
534 
535 	return 0;
536 }
537 
538 static void qedr_free_pbl(struct qedr_dev *dev,
539 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
540 {
541 	struct pci_dev *pdev = dev->pdev;
542 	int i;
543 
544 	for (i = 0; i < pbl_info->num_pbls; i++) {
545 		if (!pbl[i].va)
546 			continue;
547 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
548 				  pbl[i].va, pbl[i].pa);
549 	}
550 
551 	kfree(pbl);
552 }
553 
554 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
555 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
556 
557 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
558 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
559 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
560 
561 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
562 					   struct qedr_pbl_info *pbl_info,
563 					   gfp_t flags)
564 {
565 	struct pci_dev *pdev = dev->pdev;
566 	struct qedr_pbl *pbl_table;
567 	dma_addr_t *pbl_main_tbl;
568 	dma_addr_t pa;
569 	void *va;
570 	int i;
571 
572 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
573 	if (!pbl_table)
574 		return ERR_PTR(-ENOMEM);
575 
576 	for (i = 0; i < pbl_info->num_pbls; i++) {
577 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
578 					&pa, flags);
579 		if (!va)
580 			goto err;
581 
582 		memset(va, 0, pbl_info->pbl_size);
583 		pbl_table[i].va = va;
584 		pbl_table[i].pa = pa;
585 	}
586 
587 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
588 	 * the first one with physical pointers to all of the rest
589 	 */
590 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
591 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
592 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
593 
594 	return pbl_table;
595 
596 err:
597 	for (i--; i >= 0; i--)
598 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
599 				  pbl_table[i].va, pbl_table[i].pa);
600 
601 	qedr_free_pbl(dev, pbl_info, pbl_table);
602 
603 	return ERR_PTR(-ENOMEM);
604 }
605 
606 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
607 				struct qedr_pbl_info *pbl_info,
608 				u32 num_pbes, int two_layer_capable)
609 {
610 	u32 pbl_capacity;
611 	u32 pbl_size;
612 	u32 num_pbls;
613 
614 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
615 		if (num_pbes > MAX_PBES_TWO_LAYER) {
616 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
617 			       num_pbes);
618 			return -EINVAL;
619 		}
620 
621 		/* calculate required pbl page size */
622 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
623 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
624 			       NUM_PBES_ON_PAGE(pbl_size);
625 
626 		while (pbl_capacity < num_pbes) {
627 			pbl_size *= 2;
628 			pbl_capacity = pbl_size / sizeof(u64);
629 			pbl_capacity = pbl_capacity * pbl_capacity;
630 		}
631 
632 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
633 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
634 		pbl_info->two_layered = true;
635 	} else {
636 		/* One layered PBL */
637 		num_pbls = 1;
638 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
639 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
640 		pbl_info->two_layered = false;
641 	}
642 
643 	pbl_info->num_pbls = num_pbls;
644 	pbl_info->pbl_size = pbl_size;
645 	pbl_info->num_pbes = num_pbes;
646 
647 	DP_DEBUG(dev, QEDR_MSG_MR,
648 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
649 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
650 
651 	return 0;
652 }
653 
654 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
655 			       struct qedr_pbl *pbl,
656 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
657 {
658 	int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
659 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
660 	struct qedr_pbl *pbl_tbl;
661 	struct scatterlist *sg;
662 	struct regpair *pbe;
663 	u64 pg_addr;
664 	int entry;
665 
666 	if (!pbl_info->num_pbes)
667 		return;
668 
669 	/* If we have a two layered pbl, the first pbl points to the rest
670 	 * of the pbls and the first entry lays on the second pbl in the table
671 	 */
672 	if (pbl_info->two_layered)
673 		pbl_tbl = &pbl[1];
674 	else
675 		pbl_tbl = pbl;
676 
677 	pbe = (struct regpair *)pbl_tbl->va;
678 	if (!pbe) {
679 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
680 		return;
681 	}
682 
683 	pbe_cnt = 0;
684 
685 	shift = umem->page_shift;
686 
687 	fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
688 
689 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
690 		pages = sg_dma_len(sg) >> shift;
691 		pg_addr = sg_dma_address(sg);
692 		for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
693 			for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
694 				pbe->lo = cpu_to_le32(pg_addr);
695 				pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
696 
697 				pg_addr += BIT(pg_shift);
698 				pbe_cnt++;
699 				total_num_pbes++;
700 				pbe++;
701 
702 				if (total_num_pbes == pbl_info->num_pbes)
703 					return;
704 
705 				/* If the given pbl is full storing the pbes,
706 				 * move to next pbl.
707 				 */
708 				if (pbe_cnt ==
709 				    (pbl_info->pbl_size / sizeof(u64))) {
710 					pbl_tbl++;
711 					pbe = (struct regpair *)pbl_tbl->va;
712 					pbe_cnt = 0;
713 				}
714 
715 				fw_pg_cnt++;
716 			}
717 		}
718 	}
719 }
720 
721 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
722 			      struct qedr_cq *cq, struct ib_udata *udata)
723 {
724 	struct qedr_create_cq_uresp uresp;
725 	int rc;
726 
727 	memset(&uresp, 0, sizeof(uresp));
728 
729 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
730 	uresp.icid = cq->icid;
731 
732 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
733 	if (rc)
734 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
735 
736 	return rc;
737 }
738 
739 static void consume_cqe(struct qedr_cq *cq)
740 {
741 	if (cq->latest_cqe == cq->toggle_cqe)
742 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
743 
744 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
745 }
746 
747 static inline int qedr_align_cq_entries(int entries)
748 {
749 	u64 size, aligned_size;
750 
751 	/* We allocate an extra entry that we don't report to the FW. */
752 	size = (entries + 1) * QEDR_CQE_SIZE;
753 	aligned_size = ALIGN(size, PAGE_SIZE);
754 
755 	return aligned_size / QEDR_CQE_SIZE;
756 }
757 
758 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
759 				       struct qedr_dev *dev,
760 				       struct qedr_userq *q,
761 				       u64 buf_addr, size_t buf_len,
762 				       int access, int dmasync)
763 {
764 	u32 fw_pages;
765 	int rc;
766 
767 	q->buf_addr = buf_addr;
768 	q->buf_len = buf_len;
769 	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
770 	if (IS_ERR(q->umem)) {
771 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
772 		       PTR_ERR(q->umem));
773 		return PTR_ERR(q->umem);
774 	}
775 
776 	fw_pages = ib_umem_page_count(q->umem) <<
777 	    (q->umem->page_shift - FW_PAGE_SHIFT);
778 
779 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
780 	if (rc)
781 		goto err0;
782 
783 	q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
784 	if (IS_ERR(q->pbl_tbl)) {
785 		rc = PTR_ERR(q->pbl_tbl);
786 		goto err0;
787 	}
788 
789 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
790 				   FW_PAGE_SHIFT);
791 
792 	return 0;
793 
794 err0:
795 	ib_umem_release(q->umem);
796 
797 	return rc;
798 }
799 
800 static inline void qedr_init_cq_params(struct qedr_cq *cq,
801 				       struct qedr_ucontext *ctx,
802 				       struct qedr_dev *dev, int vector,
803 				       int chain_entries, int page_cnt,
804 				       u64 pbl_ptr,
805 				       struct qed_rdma_create_cq_in_params
806 				       *params)
807 {
808 	memset(params, 0, sizeof(*params));
809 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
810 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
811 	params->cnq_id = vector;
812 	params->cq_size = chain_entries - 1;
813 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
814 	params->pbl_num_pages = page_cnt;
815 	params->pbl_ptr = pbl_ptr;
816 	params->pbl_two_level = 0;
817 }
818 
819 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
820 {
821 	/* Flush data before signalling doorbell */
822 	wmb();
823 	cq->db.data.agg_flags = flags;
824 	cq->db.data.value = cpu_to_le32(cons);
825 	writeq(cq->db.raw, cq->db_addr);
826 
827 	/* Make sure write would stick */
828 	mmiowb();
829 }
830 
831 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
832 {
833 	struct qedr_cq *cq = get_qedr_cq(ibcq);
834 	unsigned long sflags;
835 	struct qedr_dev *dev;
836 
837 	dev = get_qedr_dev(ibcq->device);
838 
839 	if (cq->destroyed) {
840 		DP_ERR(dev,
841 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
842 		       cq, cq->icid);
843 		return -EINVAL;
844 	}
845 
846 
847 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
848 		return 0;
849 
850 	spin_lock_irqsave(&cq->cq_lock, sflags);
851 
852 	cq->arm_flags = 0;
853 
854 	if (flags & IB_CQ_SOLICITED)
855 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
856 
857 	if (flags & IB_CQ_NEXT_COMP)
858 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
859 
860 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
861 
862 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
863 
864 	return 0;
865 }
866 
867 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
868 			     const struct ib_cq_init_attr *attr,
869 			     struct ib_ucontext *ib_ctx, struct ib_udata *udata)
870 {
871 	struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
872 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
873 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
874 	struct qedr_dev *dev = get_qedr_dev(ibdev);
875 	struct qed_rdma_create_cq_in_params params;
876 	struct qedr_create_cq_ureq ureq;
877 	int vector = attr->comp_vector;
878 	int entries = attr->cqe;
879 	struct qedr_cq *cq;
880 	int chain_entries;
881 	int page_cnt;
882 	u64 pbl_ptr;
883 	u16 icid;
884 	int rc;
885 
886 	DP_DEBUG(dev, QEDR_MSG_INIT,
887 		 "create_cq: called from %s. entries=%d, vector=%d\n",
888 		 udata ? "User Lib" : "Kernel", entries, vector);
889 
890 	if (entries > QEDR_MAX_CQES) {
891 		DP_ERR(dev,
892 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
893 		       entries, QEDR_MAX_CQES);
894 		return ERR_PTR(-EINVAL);
895 	}
896 
897 	chain_entries = qedr_align_cq_entries(entries);
898 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
899 
900 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
901 	if (!cq)
902 		return ERR_PTR(-ENOMEM);
903 
904 	if (udata) {
905 		memset(&ureq, 0, sizeof(ureq));
906 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
907 			DP_ERR(dev,
908 			       "create cq: problem copying data from user space\n");
909 			goto err0;
910 		}
911 
912 		if (!ureq.len) {
913 			DP_ERR(dev,
914 			       "create cq: cannot create a cq with 0 entries\n");
915 			goto err0;
916 		}
917 
918 		cq->cq_type = QEDR_CQ_TYPE_USER;
919 
920 		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
921 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
922 		if (rc)
923 			goto err0;
924 
925 		pbl_ptr = cq->q.pbl_tbl->pa;
926 		page_cnt = cq->q.pbl_info.num_pbes;
927 
928 		cq->ibcq.cqe = chain_entries;
929 	} else {
930 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
931 
932 		rc = dev->ops->common->chain_alloc(dev->cdev,
933 						   QED_CHAIN_USE_TO_CONSUME,
934 						   QED_CHAIN_MODE_PBL,
935 						   QED_CHAIN_CNT_TYPE_U32,
936 						   chain_entries,
937 						   sizeof(union rdma_cqe),
938 						   &cq->pbl);
939 		if (rc)
940 			goto err1;
941 
942 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
943 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
944 		cq->ibcq.cqe = cq->pbl.capacity;
945 	}
946 
947 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
948 			    pbl_ptr, &params);
949 
950 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
951 	if (rc)
952 		goto err2;
953 
954 	cq->icid = icid;
955 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
956 	spin_lock_init(&cq->cq_lock);
957 
958 	if (ib_ctx) {
959 		rc = qedr_copy_cq_uresp(dev, cq, udata);
960 		if (rc)
961 			goto err3;
962 	} else {
963 		/* Generate doorbell address. */
964 		cq->db_addr = dev->db_addr +
965 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
966 		cq->db.data.icid = cq->icid;
967 		cq->db.data.params = DB_AGG_CMD_SET <<
968 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
969 
970 		/* point to the very last element, passing it we will toggle */
971 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
972 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
973 		cq->latest_cqe = NULL;
974 		consume_cqe(cq);
975 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
976 	}
977 
978 	DP_DEBUG(dev, QEDR_MSG_CQ,
979 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
980 		 cq->icid, cq, params.cq_size);
981 
982 	return &cq->ibcq;
983 
984 err3:
985 	destroy_iparams.icid = cq->icid;
986 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
987 				  &destroy_oparams);
988 err2:
989 	if (udata)
990 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
991 	else
992 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
993 err1:
994 	if (udata)
995 		ib_umem_release(cq->q.umem);
996 err0:
997 	kfree(cq);
998 	return ERR_PTR(-EINVAL);
999 }
1000 
1001 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1002 {
1003 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1004 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1005 
1006 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1007 
1008 	return 0;
1009 }
1010 
1011 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1012 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1013 
1014 int qedr_destroy_cq(struct ib_cq *ibcq)
1015 {
1016 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1017 	struct qed_rdma_destroy_cq_out_params oparams;
1018 	struct qed_rdma_destroy_cq_in_params iparams;
1019 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1020 	int iter;
1021 	int rc;
1022 
1023 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1024 
1025 	cq->destroyed = 1;
1026 
1027 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1028 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1029 		goto done;
1030 
1031 	iparams.icid = cq->icid;
1032 	rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1033 	if (rc)
1034 		return rc;
1035 
1036 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1037 
1038 	if (ibcq->uobject && ibcq->uobject->context) {
1039 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1040 		ib_umem_release(cq->q.umem);
1041 	}
1042 
1043 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1044 	 * wait until all CNQ interrupts, if any, are received. This will always
1045 	 * happen and will always happen very fast. If not, then a serious error
1046 	 * has occured. That is why we can use a long delay.
1047 	 * We spin for a short time so we don’t lose time on context switching
1048 	 * in case all the completions are handled in that span. Otherwise
1049 	 * we sleep for a while and check again. Since the CNQ may be
1050 	 * associated with (only) the current CPU we use msleep to allow the
1051 	 * current CPU to be freed.
1052 	 * The CNQ notification is increased in qedr_irq_handler().
1053 	 */
1054 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1055 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1056 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1057 		iter--;
1058 	}
1059 
1060 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1061 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1062 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1063 		iter--;
1064 	}
1065 
1066 	if (oparams.num_cq_notif != cq->cnq_notif)
1067 		goto err;
1068 
1069 	/* Note that we don't need to have explicit code to wait for the
1070 	 * completion of the event handler because it is invoked from the EQ.
1071 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1072 	 * be certain that there's no event handler in process.
1073 	 */
1074 done:
1075 	cq->sig = ~cq->sig;
1076 
1077 	kfree(cq);
1078 
1079 	return 0;
1080 
1081 err:
1082 	DP_ERR(dev,
1083 	       "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1084 	       cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1085 
1086 	return -EINVAL;
1087 }
1088 
1089 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1090 					  struct ib_qp_attr *attr,
1091 					  int attr_mask,
1092 					  struct qed_rdma_modify_qp_in_params
1093 					  *qp_params)
1094 {
1095 	enum rdma_network_type nw_type;
1096 	struct ib_gid_attr gid_attr;
1097 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1098 	union ib_gid gid;
1099 	u32 ipv4_addr;
1100 	int rc = 0;
1101 	int i;
1102 
1103 	rc = ib_get_cached_gid(ibqp->device,
1104 			       rdma_ah_get_port_num(&attr->ah_attr),
1105 			       grh->sgid_index, &gid, &gid_attr);
1106 	if (rc)
1107 		return rc;
1108 
1109 	if (!memcmp(&gid, &zgid, sizeof(gid)))
1110 		return -ENOENT;
1111 
1112 	if (gid_attr.ndev) {
1113 		qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1114 
1115 		dev_put(gid_attr.ndev);
1116 		nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1117 		switch (nw_type) {
1118 		case RDMA_NETWORK_IPV6:
1119 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1120 			       sizeof(qp_params->sgid));
1121 			memcpy(&qp_params->dgid.bytes[0],
1122 			       &grh->dgid,
1123 			       sizeof(qp_params->dgid));
1124 			qp_params->roce_mode = ROCE_V2_IPV6;
1125 			SET_FIELD(qp_params->modify_flags,
1126 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1127 			break;
1128 		case RDMA_NETWORK_IB:
1129 			memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1130 			       sizeof(qp_params->sgid));
1131 			memcpy(&qp_params->dgid.bytes[0],
1132 			       &grh->dgid,
1133 			       sizeof(qp_params->dgid));
1134 			qp_params->roce_mode = ROCE_V1;
1135 			break;
1136 		case RDMA_NETWORK_IPV4:
1137 			memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1138 			memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1139 			ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1140 			qp_params->sgid.ipv4_addr = ipv4_addr;
1141 			ipv4_addr =
1142 			    qedr_get_ipv4_from_gid(grh->dgid.raw);
1143 			qp_params->dgid.ipv4_addr = ipv4_addr;
1144 			SET_FIELD(qp_params->modify_flags,
1145 				  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1146 			qp_params->roce_mode = ROCE_V2_IPV4;
1147 			break;
1148 		}
1149 	}
1150 
1151 	for (i = 0; i < 4; i++) {
1152 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1153 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1154 	}
1155 
1156 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1157 		qp_params->vlan_id = 0;
1158 
1159 	return 0;
1160 }
1161 
1162 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1163 			       struct ib_qp_init_attr *attrs)
1164 {
1165 	struct qedr_device_attr *qattr = &dev->attr;
1166 
1167 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1168 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1169 		DP_DEBUG(dev, QEDR_MSG_QP,
1170 			 "create qp: unsupported qp type=0x%x requested\n",
1171 			 attrs->qp_type);
1172 		return -EINVAL;
1173 	}
1174 
1175 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1176 		DP_ERR(dev,
1177 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1178 		       attrs->cap.max_send_wr, qattr->max_sqe);
1179 		return -EINVAL;
1180 	}
1181 
1182 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1183 		DP_ERR(dev,
1184 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1185 		       attrs->cap.max_inline_data, qattr->max_inline);
1186 		return -EINVAL;
1187 	}
1188 
1189 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1190 		DP_ERR(dev,
1191 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1192 		       attrs->cap.max_send_sge, qattr->max_sge);
1193 		return -EINVAL;
1194 	}
1195 
1196 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1197 		DP_ERR(dev,
1198 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1199 		       attrs->cap.max_recv_sge, qattr->max_sge);
1200 		return -EINVAL;
1201 	}
1202 
1203 	/* Unprivileged user space cannot create special QP */
1204 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1205 		DP_ERR(dev,
1206 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1207 		       attrs->qp_type);
1208 		return -EINVAL;
1209 	}
1210 
1211 	return 0;
1212 }
1213 
1214 static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
1215 			       struct qedr_qp *qp)
1216 {
1217 	uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1218 	uresp->rq_icid = qp->icid;
1219 }
1220 
1221 static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
1222 			       struct qedr_qp *qp)
1223 {
1224 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1225 	uresp->sq_icid = qp->icid + 1;
1226 }
1227 
1228 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1229 			      struct qedr_qp *qp, struct ib_udata *udata)
1230 {
1231 	struct qedr_create_qp_uresp uresp;
1232 	int rc;
1233 
1234 	memset(&uresp, 0, sizeof(uresp));
1235 	qedr_copy_sq_uresp(&uresp, qp);
1236 	qedr_copy_rq_uresp(&uresp, qp);
1237 
1238 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1239 	uresp.qp_id = qp->qp_id;
1240 
1241 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1242 	if (rc)
1243 		DP_ERR(dev,
1244 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1245 		       qp->icid);
1246 
1247 	return rc;
1248 }
1249 
1250 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1251 				      struct qedr_qp *qp,
1252 				      struct qedr_pd *pd,
1253 				      struct ib_qp_init_attr *attrs)
1254 {
1255 	spin_lock_init(&qp->q_lock);
1256 	qp->pd = pd;
1257 	qp->qp_type = attrs->qp_type;
1258 	qp->max_inline_data = attrs->cap.max_inline_data;
1259 	qp->sq.max_sges = attrs->cap.max_send_sge;
1260 	qp->state = QED_ROCE_QP_STATE_RESET;
1261 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1262 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1263 	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1264 	qp->dev = dev;
1265 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1266 
1267 	DP_DEBUG(dev, QEDR_MSG_QP,
1268 		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1269 		 qp->rq.max_sges, qp->rq_cq->icid);
1270 	DP_DEBUG(dev, QEDR_MSG_QP,
1271 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1272 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1273 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1274 	DP_DEBUG(dev, QEDR_MSG_QP,
1275 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1276 		 qp->sq.max_sges, qp->sq_cq->icid);
1277 }
1278 
1279 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1280 {
1281 	qp->sq.db = dev->db_addr +
1282 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1283 	qp->sq.db_data.data.icid = qp->icid + 1;
1284 	qp->rq.db = dev->db_addr +
1285 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1286 	qp->rq.db_data.data.icid = qp->icid;
1287 }
1288 
1289 static inline void
1290 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1291 			      struct qedr_pd *pd,
1292 			      struct qedr_qp *qp,
1293 			      struct ib_qp_init_attr *attrs,
1294 			      bool fmr_and_reserved_lkey,
1295 			      struct qed_rdma_create_qp_in_params *params)
1296 {
1297 	/* QP handle to be written in an async event */
1298 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1299 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1300 
1301 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1302 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1303 	params->pd = pd->pd_id;
1304 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1305 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1306 	params->stats_queue = 0;
1307 	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1308 	params->srq_id = 0;
1309 	params->use_srq = false;
1310 }
1311 
1312 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1313 {
1314 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1315 		 "qp=%p. "
1316 		 "sq_addr=0x%llx, "
1317 		 "sq_len=%zd, "
1318 		 "rq_addr=0x%llx, "
1319 		 "rq_len=%zd"
1320 		 "\n",
1321 		 qp,
1322 		 qp->usq.buf_addr,
1323 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1324 }
1325 
1326 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1327 {
1328 	if (qp->usq.umem)
1329 		ib_umem_release(qp->usq.umem);
1330 	qp->usq.umem = NULL;
1331 
1332 	if (qp->urq.umem)
1333 		ib_umem_release(qp->urq.umem);
1334 	qp->urq.umem = NULL;
1335 }
1336 
1337 static int qedr_create_user_qp(struct qedr_dev *dev,
1338 			       struct qedr_qp *qp,
1339 			       struct ib_pd *ibpd,
1340 			       struct ib_udata *udata,
1341 			       struct ib_qp_init_attr *attrs)
1342 {
1343 	struct qed_rdma_create_qp_in_params in_params;
1344 	struct qed_rdma_create_qp_out_params out_params;
1345 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1346 	struct ib_ucontext *ib_ctx = NULL;
1347 	struct qedr_ucontext *ctx = NULL;
1348 	struct qedr_create_qp_ureq ureq;
1349 	int rc = -EINVAL;
1350 
1351 	ib_ctx = ibpd->uobject->context;
1352 	ctx = get_qedr_ucontext(ib_ctx);
1353 
1354 	memset(&ureq, 0, sizeof(ureq));
1355 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1356 	if (rc) {
1357 		DP_ERR(dev, "Problem copying data from user space\n");
1358 		return rc;
1359 	}
1360 
1361 	/* SQ - read access only (0), dma sync not required (0) */
1362 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1363 				  ureq.sq_len, 0, 0);
1364 	if (rc)
1365 		return rc;
1366 
1367 	/* RQ - read access only (0), dma sync not required (0) */
1368 	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1369 				  ureq.rq_len, 0, 0);
1370 
1371 	if (rc)
1372 		return rc;
1373 
1374 	memset(&in_params, 0, sizeof(in_params));
1375 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1376 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1377 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1378 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1379 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1380 	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1381 	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1382 
1383 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1384 					      &in_params, &out_params);
1385 
1386 	if (!qp->qed_qp) {
1387 		rc = -ENOMEM;
1388 		goto err1;
1389 	}
1390 
1391 	qp->qp_id = out_params.qp_id;
1392 	qp->icid = out_params.icid;
1393 
1394 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1395 	if (rc)
1396 		goto err;
1397 
1398 	qedr_qp_user_print(dev, qp);
1399 
1400 	return 0;
1401 err:
1402 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1403 	if (rc)
1404 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1405 
1406 err1:
1407 	qedr_cleanup_user(dev, qp);
1408 	return rc;
1409 }
1410 
1411 static int
1412 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1413 			   struct qedr_qp *qp,
1414 			   struct qed_rdma_create_qp_in_params *in_params,
1415 			   u32 n_sq_elems, u32 n_rq_elems)
1416 {
1417 	struct qed_rdma_create_qp_out_params out_params;
1418 	int rc;
1419 
1420 	rc = dev->ops->common->chain_alloc(dev->cdev,
1421 					   QED_CHAIN_USE_TO_PRODUCE,
1422 					   QED_CHAIN_MODE_PBL,
1423 					   QED_CHAIN_CNT_TYPE_U32,
1424 					   n_sq_elems,
1425 					   QEDR_SQE_ELEMENT_SIZE,
1426 					   &qp->sq.pbl);
1427 
1428 	if (rc)
1429 		return rc;
1430 
1431 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1432 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1433 
1434 	rc = dev->ops->common->chain_alloc(dev->cdev,
1435 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1436 					   QED_CHAIN_MODE_PBL,
1437 					   QED_CHAIN_CNT_TYPE_U32,
1438 					   n_rq_elems,
1439 					   QEDR_RQE_ELEMENT_SIZE,
1440 					   &qp->rq.pbl);
1441 	if (rc)
1442 		return rc;
1443 
1444 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1445 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1446 
1447 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1448 					      in_params, &out_params);
1449 
1450 	if (!qp->qed_qp)
1451 		return -EINVAL;
1452 
1453 	qp->qp_id = out_params.qp_id;
1454 	qp->icid = out_params.icid;
1455 
1456 	qedr_set_roce_db_info(dev, qp);
1457 
1458 	return 0;
1459 }
1460 
1461 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1462 {
1463 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1464 	kfree(qp->wqe_wr_id);
1465 
1466 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1467 	kfree(qp->rqe_wr_id);
1468 }
1469 
1470 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1471 				 struct qedr_qp *qp,
1472 				 struct ib_pd *ibpd,
1473 				 struct ib_qp_init_attr *attrs)
1474 {
1475 	struct qed_rdma_create_qp_in_params in_params;
1476 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1477 	int rc = -EINVAL;
1478 	u32 n_rq_elems;
1479 	u32 n_sq_elems;
1480 	u32 n_sq_entries;
1481 
1482 	memset(&in_params, 0, sizeof(in_params));
1483 
1484 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1485 	 * the ring. The ring should allow at least a single WR, even if the
1486 	 * user requested none, due to allocation issues.
1487 	 * We should add an extra WR since the prod and cons indices of
1488 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1489 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1490 	 * double the number of entries due an iSER issue that pushes far more
1491 	 * WRs than indicated. If we decline its ib_post_send() then we get
1492 	 * error prints in the dmesg we'd like to avoid.
1493 	 */
1494 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1495 			      dev->attr.max_sqe);
1496 
1497 	qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1498 				GFP_KERNEL);
1499 	if (!qp->wqe_wr_id) {
1500 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1501 		return -ENOMEM;
1502 	}
1503 
1504 	/* QP handle to be written in CQE */
1505 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1506 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1507 
1508 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1509 	 * the ring. There ring should allow at least a single WR, even if the
1510 	 * user requested none, due to allocation issues.
1511 	 */
1512 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1513 
1514 	/* Allocate driver internal RQ array */
1515 	qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1516 				GFP_KERNEL);
1517 	if (!qp->rqe_wr_id) {
1518 		DP_ERR(dev,
1519 		       "create qp: failed RQ shadow memory allocation\n");
1520 		kfree(qp->wqe_wr_id);
1521 		return -ENOMEM;
1522 	}
1523 
1524 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1525 
1526 	n_sq_entries = attrs->cap.max_send_wr;
1527 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1528 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1529 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1530 
1531 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1532 
1533 	rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1534 					n_sq_elems, n_rq_elems);
1535 	if (rc)
1536 		qedr_cleanup_kernel(dev, qp);
1537 
1538 	return rc;
1539 }
1540 
1541 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1542 			     struct ib_qp_init_attr *attrs,
1543 			     struct ib_udata *udata)
1544 {
1545 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1546 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1547 	struct qedr_qp *qp;
1548 	struct ib_qp *ibqp;
1549 	int rc = 0;
1550 
1551 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1552 		 udata ? "user library" : "kernel", pd);
1553 
1554 	rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1555 	if (rc)
1556 		return ERR_PTR(rc);
1557 
1558 	if (attrs->srq)
1559 		return ERR_PTR(-EINVAL);
1560 
1561 	DP_DEBUG(dev, QEDR_MSG_QP,
1562 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1563 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1564 		 get_qedr_cq(attrs->send_cq),
1565 		 get_qedr_cq(attrs->send_cq)->icid,
1566 		 get_qedr_cq(attrs->recv_cq),
1567 		 get_qedr_cq(attrs->recv_cq)->icid);
1568 
1569 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1570 	if (!qp) {
1571 		DP_ERR(dev, "create qp: failed allocating memory\n");
1572 		return ERR_PTR(-ENOMEM);
1573 	}
1574 
1575 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1576 
1577 	if (attrs->qp_type == IB_QPT_GSI) {
1578 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1579 		if (IS_ERR(ibqp))
1580 			kfree(qp);
1581 		return ibqp;
1582 	}
1583 
1584 	if (udata)
1585 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1586 	else
1587 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1588 
1589 	if (rc)
1590 		goto err;
1591 
1592 	qp->ibqp.qp_num = qp->qp_id;
1593 
1594 	return &qp->ibqp;
1595 
1596 err:
1597 	kfree(qp);
1598 
1599 	return ERR_PTR(-EFAULT);
1600 }
1601 
1602 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1603 {
1604 	switch (qp_state) {
1605 	case QED_ROCE_QP_STATE_RESET:
1606 		return IB_QPS_RESET;
1607 	case QED_ROCE_QP_STATE_INIT:
1608 		return IB_QPS_INIT;
1609 	case QED_ROCE_QP_STATE_RTR:
1610 		return IB_QPS_RTR;
1611 	case QED_ROCE_QP_STATE_RTS:
1612 		return IB_QPS_RTS;
1613 	case QED_ROCE_QP_STATE_SQD:
1614 		return IB_QPS_SQD;
1615 	case QED_ROCE_QP_STATE_ERR:
1616 		return IB_QPS_ERR;
1617 	case QED_ROCE_QP_STATE_SQE:
1618 		return IB_QPS_SQE;
1619 	}
1620 	return IB_QPS_ERR;
1621 }
1622 
1623 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1624 					enum ib_qp_state qp_state)
1625 {
1626 	switch (qp_state) {
1627 	case IB_QPS_RESET:
1628 		return QED_ROCE_QP_STATE_RESET;
1629 	case IB_QPS_INIT:
1630 		return QED_ROCE_QP_STATE_INIT;
1631 	case IB_QPS_RTR:
1632 		return QED_ROCE_QP_STATE_RTR;
1633 	case IB_QPS_RTS:
1634 		return QED_ROCE_QP_STATE_RTS;
1635 	case IB_QPS_SQD:
1636 		return QED_ROCE_QP_STATE_SQD;
1637 	case IB_QPS_ERR:
1638 		return QED_ROCE_QP_STATE_ERR;
1639 	default:
1640 		return QED_ROCE_QP_STATE_ERR;
1641 	}
1642 }
1643 
1644 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1645 {
1646 	qed_chain_reset(&qph->pbl);
1647 	qph->prod = 0;
1648 	qph->cons = 0;
1649 	qph->wqe_cons = 0;
1650 	qph->db_data.data.value = cpu_to_le16(0);
1651 }
1652 
1653 static int qedr_update_qp_state(struct qedr_dev *dev,
1654 				struct qedr_qp *qp,
1655 				enum qed_roce_qp_state new_state)
1656 {
1657 	int status = 0;
1658 
1659 	if (new_state == qp->state)
1660 		return 0;
1661 
1662 	switch (qp->state) {
1663 	case QED_ROCE_QP_STATE_RESET:
1664 		switch (new_state) {
1665 		case QED_ROCE_QP_STATE_INIT:
1666 			qp->prev_wqe_size = 0;
1667 			qedr_reset_qp_hwq_info(&qp->sq);
1668 			qedr_reset_qp_hwq_info(&qp->rq);
1669 			break;
1670 		default:
1671 			status = -EINVAL;
1672 			break;
1673 		};
1674 		break;
1675 	case QED_ROCE_QP_STATE_INIT:
1676 		switch (new_state) {
1677 		case QED_ROCE_QP_STATE_RTR:
1678 			/* Update doorbell (in case post_recv was
1679 			 * done before move to RTR)
1680 			 */
1681 			wmb();
1682 			writel(qp->rq.db_data.raw, qp->rq.db);
1683 			/* Make sure write takes effect */
1684 			mmiowb();
1685 			break;
1686 		case QED_ROCE_QP_STATE_ERR:
1687 			break;
1688 		default:
1689 			/* Invalid state change. */
1690 			status = -EINVAL;
1691 			break;
1692 		};
1693 		break;
1694 	case QED_ROCE_QP_STATE_RTR:
1695 		/* RTR->XXX */
1696 		switch (new_state) {
1697 		case QED_ROCE_QP_STATE_RTS:
1698 			break;
1699 		case QED_ROCE_QP_STATE_ERR:
1700 			break;
1701 		default:
1702 			/* Invalid state change. */
1703 			status = -EINVAL;
1704 			break;
1705 		};
1706 		break;
1707 	case QED_ROCE_QP_STATE_RTS:
1708 		/* RTS->XXX */
1709 		switch (new_state) {
1710 		case QED_ROCE_QP_STATE_SQD:
1711 			break;
1712 		case QED_ROCE_QP_STATE_ERR:
1713 			break;
1714 		default:
1715 			/* Invalid state change. */
1716 			status = -EINVAL;
1717 			break;
1718 		};
1719 		break;
1720 	case QED_ROCE_QP_STATE_SQD:
1721 		/* SQD->XXX */
1722 		switch (new_state) {
1723 		case QED_ROCE_QP_STATE_RTS:
1724 		case QED_ROCE_QP_STATE_ERR:
1725 			break;
1726 		default:
1727 			/* Invalid state change. */
1728 			status = -EINVAL;
1729 			break;
1730 		};
1731 		break;
1732 	case QED_ROCE_QP_STATE_ERR:
1733 		/* ERR->XXX */
1734 		switch (new_state) {
1735 		case QED_ROCE_QP_STATE_RESET:
1736 			if ((qp->rq.prod != qp->rq.cons) ||
1737 			    (qp->sq.prod != qp->sq.cons)) {
1738 				DP_NOTICE(dev,
1739 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1740 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
1741 					  qp->sq.cons);
1742 				status = -EINVAL;
1743 			}
1744 			break;
1745 		default:
1746 			status = -EINVAL;
1747 			break;
1748 		};
1749 		break;
1750 	default:
1751 		status = -EINVAL;
1752 		break;
1753 	};
1754 
1755 	return status;
1756 }
1757 
1758 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1759 		   int attr_mask, struct ib_udata *udata)
1760 {
1761 	struct qedr_qp *qp = get_qedr_qp(ibqp);
1762 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1763 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1764 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1765 	enum ib_qp_state old_qp_state, new_qp_state;
1766 	int rc = 0;
1767 
1768 	DP_DEBUG(dev, QEDR_MSG_QP,
1769 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1770 		 attr->qp_state);
1771 
1772 	old_qp_state = qedr_get_ibqp_state(qp->state);
1773 	if (attr_mask & IB_QP_STATE)
1774 		new_qp_state = attr->qp_state;
1775 	else
1776 		new_qp_state = old_qp_state;
1777 
1778 	if (!ib_modify_qp_is_ok
1779 	    (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
1780 	     IB_LINK_LAYER_ETHERNET)) {
1781 		DP_ERR(dev,
1782 		       "modify qp: invalid attribute mask=0x%x specified for\n"
1783 		       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1784 		       attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
1785 		       new_qp_state);
1786 		rc = -EINVAL;
1787 		goto err;
1788 	}
1789 
1790 	/* Translate the masks... */
1791 	if (attr_mask & IB_QP_STATE) {
1792 		SET_FIELD(qp_params.modify_flags,
1793 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1794 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1795 	}
1796 
1797 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1798 		qp_params.sqd_async = true;
1799 
1800 	if (attr_mask & IB_QP_PKEY_INDEX) {
1801 		SET_FIELD(qp_params.modify_flags,
1802 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1803 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1804 			rc = -EINVAL;
1805 			goto err;
1806 		}
1807 
1808 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1809 	}
1810 
1811 	if (attr_mask & IB_QP_QKEY)
1812 		qp->qkey = attr->qkey;
1813 
1814 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1815 		SET_FIELD(qp_params.modify_flags,
1816 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1817 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1818 						  IB_ACCESS_REMOTE_READ;
1819 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1820 						   IB_ACCESS_REMOTE_WRITE;
1821 		qp_params.incoming_atomic_en = attr->qp_access_flags &
1822 					       IB_ACCESS_REMOTE_ATOMIC;
1823 	}
1824 
1825 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1826 		if (attr_mask & IB_QP_PATH_MTU) {
1827 			if (attr->path_mtu < IB_MTU_256 ||
1828 			    attr->path_mtu > IB_MTU_4096) {
1829 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1830 				rc = -EINVAL;
1831 				goto err;
1832 			}
1833 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1834 				      ib_mtu_enum_to_int(iboe_get_mtu
1835 							 (dev->ndev->mtu)));
1836 		}
1837 
1838 		if (!qp->mtu) {
1839 			qp->mtu =
1840 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1841 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1842 		}
1843 
1844 		SET_FIELD(qp_params.modify_flags,
1845 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1846 
1847 		qp_params.traffic_class_tos = grh->traffic_class;
1848 		qp_params.flow_label = grh->flow_label;
1849 		qp_params.hop_limit_ttl = grh->hop_limit;
1850 
1851 		qp->sgid_idx = grh->sgid_index;
1852 
1853 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1854 		if (rc) {
1855 			DP_ERR(dev,
1856 			       "modify qp: problems with GID index %d (rc=%d)\n",
1857 			       grh->sgid_index, rc);
1858 			return rc;
1859 		}
1860 
1861 		rc = qedr_get_dmac(dev, &attr->ah_attr,
1862 				   qp_params.remote_mac_addr);
1863 		if (rc)
1864 			return rc;
1865 
1866 		qp_params.use_local_mac = true;
1867 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
1868 
1869 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
1870 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
1871 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
1872 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
1873 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
1874 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
1875 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
1876 			 qp_params.remote_mac_addr);
1877 
1878 		qp_params.mtu = qp->mtu;
1879 		qp_params.lb_indication = false;
1880 	}
1881 
1882 	if (!qp_params.mtu) {
1883 		/* Stay with current MTU */
1884 		if (qp->mtu)
1885 			qp_params.mtu = qp->mtu;
1886 		else
1887 			qp_params.mtu =
1888 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1889 	}
1890 
1891 	if (attr_mask & IB_QP_TIMEOUT) {
1892 		SET_FIELD(qp_params.modify_flags,
1893 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
1894 
1895 		qp_params.ack_timeout = attr->timeout;
1896 		if (attr->timeout) {
1897 			u32 temp;
1898 
1899 			temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
1900 			/* FW requires [msec] */
1901 			qp_params.ack_timeout = temp;
1902 		} else {
1903 			/* Infinite */
1904 			qp_params.ack_timeout = 0;
1905 		}
1906 	}
1907 	if (attr_mask & IB_QP_RETRY_CNT) {
1908 		SET_FIELD(qp_params.modify_flags,
1909 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
1910 		qp_params.retry_cnt = attr->retry_cnt;
1911 	}
1912 
1913 	if (attr_mask & IB_QP_RNR_RETRY) {
1914 		SET_FIELD(qp_params.modify_flags,
1915 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
1916 		qp_params.rnr_retry_cnt = attr->rnr_retry;
1917 	}
1918 
1919 	if (attr_mask & IB_QP_RQ_PSN) {
1920 		SET_FIELD(qp_params.modify_flags,
1921 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
1922 		qp_params.rq_psn = attr->rq_psn;
1923 		qp->rq_psn = attr->rq_psn;
1924 	}
1925 
1926 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1927 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
1928 			rc = -EINVAL;
1929 			DP_ERR(dev,
1930 			       "unsupported max_rd_atomic=%d, supported=%d\n",
1931 			       attr->max_rd_atomic,
1932 			       dev->attr.max_qp_req_rd_atomic_resc);
1933 			goto err;
1934 		}
1935 
1936 		SET_FIELD(qp_params.modify_flags,
1937 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
1938 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
1939 	}
1940 
1941 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1942 		SET_FIELD(qp_params.modify_flags,
1943 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
1944 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
1945 	}
1946 
1947 	if (attr_mask & IB_QP_SQ_PSN) {
1948 		SET_FIELD(qp_params.modify_flags,
1949 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
1950 		qp_params.sq_psn = attr->sq_psn;
1951 		qp->sq_psn = attr->sq_psn;
1952 	}
1953 
1954 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1955 		if (attr->max_dest_rd_atomic >
1956 		    dev->attr.max_qp_resp_rd_atomic_resc) {
1957 			DP_ERR(dev,
1958 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
1959 			       attr->max_dest_rd_atomic,
1960 			       dev->attr.max_qp_resp_rd_atomic_resc);
1961 
1962 			rc = -EINVAL;
1963 			goto err;
1964 		}
1965 
1966 		SET_FIELD(qp_params.modify_flags,
1967 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
1968 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
1969 	}
1970 
1971 	if (attr_mask & IB_QP_DEST_QPN) {
1972 		SET_FIELD(qp_params.modify_flags,
1973 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
1974 
1975 		qp_params.dest_qp = attr->dest_qp_num;
1976 		qp->dest_qp_num = attr->dest_qp_num;
1977 	}
1978 
1979 	if (qp->qp_type != IB_QPT_GSI)
1980 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
1981 					      qp->qed_qp, &qp_params);
1982 
1983 	if (attr_mask & IB_QP_STATE) {
1984 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
1985 			rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
1986 		qp->state = qp_params.new_state;
1987 	}
1988 
1989 err:
1990 	return rc;
1991 }
1992 
1993 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
1994 {
1995 	int ib_qp_acc_flags = 0;
1996 
1997 	if (params->incoming_rdma_write_en)
1998 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1999 	if (params->incoming_rdma_read_en)
2000 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2001 	if (params->incoming_atomic_en)
2002 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2003 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2004 	return ib_qp_acc_flags;
2005 }
2006 
2007 int qedr_query_qp(struct ib_qp *ibqp,
2008 		  struct ib_qp_attr *qp_attr,
2009 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2010 {
2011 	struct qed_rdma_query_qp_out_params params;
2012 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2013 	struct qedr_dev *dev = qp->dev;
2014 	int rc = 0;
2015 
2016 	memset(&params, 0, sizeof(params));
2017 
2018 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2019 	if (rc)
2020 		goto err;
2021 
2022 	memset(qp_attr, 0, sizeof(*qp_attr));
2023 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2024 
2025 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2026 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2027 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2028 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2029 	qp_attr->rq_psn = params.rq_psn;
2030 	qp_attr->sq_psn = params.sq_psn;
2031 	qp_attr->dest_qp_num = params.dest_qp;
2032 
2033 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2034 
2035 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2036 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2037 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2038 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2039 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2040 	qp_init_attr->cap = qp_attr->cap;
2041 
2042 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2043 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2044 			params.flow_label, qp->sgid_idx,
2045 			params.hop_limit_ttl, params.traffic_class_tos);
2046 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2047 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2048 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2049 	qp_attr->timeout = params.timeout;
2050 	qp_attr->rnr_retry = params.rnr_retry;
2051 	qp_attr->retry_cnt = params.retry_cnt;
2052 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2053 	qp_attr->pkey_index = params.pkey_index;
2054 	qp_attr->port_num = 1;
2055 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2056 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2057 	qp_attr->alt_pkey_index = 0;
2058 	qp_attr->alt_port_num = 0;
2059 	qp_attr->alt_timeout = 0;
2060 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2061 
2062 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2063 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2064 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2065 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2066 
2067 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2068 		 qp_attr->cap.max_inline_data);
2069 
2070 err:
2071 	return rc;
2072 }
2073 
2074 int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2075 {
2076 	int rc = 0;
2077 
2078 	if (qp->qp_type != IB_QPT_GSI) {
2079 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2080 		if (rc)
2081 			return rc;
2082 	}
2083 
2084 	if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2085 		qedr_cleanup_user(dev, qp);
2086 	else
2087 		qedr_cleanup_kernel(dev, qp);
2088 
2089 	return 0;
2090 }
2091 
2092 int qedr_destroy_qp(struct ib_qp *ibqp)
2093 {
2094 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2095 	struct qedr_dev *dev = qp->dev;
2096 	struct ib_qp_attr attr;
2097 	int attr_mask = 0;
2098 	int rc = 0;
2099 
2100 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2101 		 qp, qp->qp_type);
2102 
2103 	if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2104 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2105 	    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2106 
2107 		attr.qp_state = IB_QPS_ERR;
2108 		attr_mask |= IB_QP_STATE;
2109 
2110 		/* Change the QP state to ERROR */
2111 		qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2112 	}
2113 
2114 	if (qp->qp_type == IB_QPT_GSI)
2115 		qedr_destroy_gsi_qp(dev);
2116 
2117 	qedr_free_qp_resources(dev, qp);
2118 
2119 	kfree(qp);
2120 
2121 	return rc;
2122 }
2123 
2124 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2125 			     struct ib_udata *udata)
2126 {
2127 	struct qedr_ah *ah;
2128 
2129 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2130 	if (!ah)
2131 		return ERR_PTR(-ENOMEM);
2132 
2133 	ah->attr = *attr;
2134 
2135 	return &ah->ibah;
2136 }
2137 
2138 int qedr_destroy_ah(struct ib_ah *ibah)
2139 {
2140 	struct qedr_ah *ah = get_qedr_ah(ibah);
2141 
2142 	kfree(ah);
2143 	return 0;
2144 }
2145 
2146 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2147 {
2148 	struct qedr_pbl *pbl, *tmp;
2149 
2150 	if (info->pbl_table)
2151 		list_add_tail(&info->pbl_table->list_entry,
2152 			      &info->free_pbl_list);
2153 
2154 	if (!list_empty(&info->inuse_pbl_list))
2155 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2156 
2157 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2158 		list_del(&pbl->list_entry);
2159 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2160 	}
2161 }
2162 
2163 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2164 			size_t page_list_len, bool two_layered)
2165 {
2166 	struct qedr_pbl *tmp;
2167 	int rc;
2168 
2169 	INIT_LIST_HEAD(&info->free_pbl_list);
2170 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2171 
2172 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2173 				  page_list_len, two_layered);
2174 	if (rc)
2175 		goto done;
2176 
2177 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2178 	if (IS_ERR(info->pbl_table)) {
2179 		rc = PTR_ERR(info->pbl_table);
2180 		goto done;
2181 	}
2182 
2183 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2184 		 &info->pbl_table->pa);
2185 
2186 	/* in usual case we use 2 PBLs, so we add one to free
2187 	 * list and allocating another one
2188 	 */
2189 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2190 	if (IS_ERR(tmp)) {
2191 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2192 		goto done;
2193 	}
2194 
2195 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2196 
2197 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2198 
2199 done:
2200 	if (rc)
2201 		free_mr_info(dev, info);
2202 
2203 	return rc;
2204 }
2205 
2206 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2207 			       u64 usr_addr, int acc, struct ib_udata *udata)
2208 {
2209 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2210 	struct qedr_mr *mr;
2211 	struct qedr_pd *pd;
2212 	int rc = -ENOMEM;
2213 
2214 	pd = get_qedr_pd(ibpd);
2215 	DP_DEBUG(dev, QEDR_MSG_MR,
2216 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2217 		 pd->pd_id, start, len, usr_addr, acc);
2218 
2219 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2220 		return ERR_PTR(-EINVAL);
2221 
2222 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2223 	if (!mr)
2224 		return ERR_PTR(rc);
2225 
2226 	mr->type = QEDR_MR_USER;
2227 
2228 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2229 	if (IS_ERR(mr->umem)) {
2230 		rc = -EFAULT;
2231 		goto err0;
2232 	}
2233 
2234 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2235 	if (rc)
2236 		goto err1;
2237 
2238 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2239 			   &mr->info.pbl_info, mr->umem->page_shift);
2240 
2241 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2242 	if (rc) {
2243 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2244 		goto err1;
2245 	}
2246 
2247 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2248 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2249 	mr->hw_mr.key = 0;
2250 	mr->hw_mr.pd = pd->pd_id;
2251 	mr->hw_mr.local_read = 1;
2252 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2253 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2254 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2255 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2256 	mr->hw_mr.mw_bind = false;
2257 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2258 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2259 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2260 	mr->hw_mr.page_size_log = mr->umem->page_shift;
2261 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2262 	mr->hw_mr.length = len;
2263 	mr->hw_mr.vaddr = usr_addr;
2264 	mr->hw_mr.zbva = false;
2265 	mr->hw_mr.phy_mr = false;
2266 	mr->hw_mr.dma_mr = false;
2267 
2268 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2269 	if (rc) {
2270 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2271 		goto err2;
2272 	}
2273 
2274 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2275 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2276 	    mr->hw_mr.remote_atomic)
2277 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2278 
2279 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2280 		 mr->ibmr.lkey);
2281 	return &mr->ibmr;
2282 
2283 err2:
2284 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2285 err1:
2286 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2287 err0:
2288 	kfree(mr);
2289 	return ERR_PTR(rc);
2290 }
2291 
2292 int qedr_dereg_mr(struct ib_mr *ib_mr)
2293 {
2294 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2295 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2296 	int rc = 0;
2297 
2298 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2299 	if (rc)
2300 		return rc;
2301 
2302 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2303 
2304 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2305 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2306 
2307 	/* it could be user registered memory. */
2308 	if (mr->umem)
2309 		ib_umem_release(mr->umem);
2310 
2311 	kfree(mr);
2312 
2313 	return rc;
2314 }
2315 
2316 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2317 				       int max_page_list_len)
2318 {
2319 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2320 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2321 	struct qedr_mr *mr;
2322 	int rc = -ENOMEM;
2323 
2324 	DP_DEBUG(dev, QEDR_MSG_MR,
2325 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2326 		 max_page_list_len);
2327 
2328 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2329 	if (!mr)
2330 		return ERR_PTR(rc);
2331 
2332 	mr->dev = dev;
2333 	mr->type = QEDR_MR_FRMR;
2334 
2335 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2336 	if (rc)
2337 		goto err0;
2338 
2339 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2340 	if (rc) {
2341 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2342 		goto err0;
2343 	}
2344 
2345 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2346 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2347 	mr->hw_mr.key = 0;
2348 	mr->hw_mr.pd = pd->pd_id;
2349 	mr->hw_mr.local_read = 1;
2350 	mr->hw_mr.local_write = 0;
2351 	mr->hw_mr.remote_read = 0;
2352 	mr->hw_mr.remote_write = 0;
2353 	mr->hw_mr.remote_atomic = 0;
2354 	mr->hw_mr.mw_bind = false;
2355 	mr->hw_mr.pbl_ptr = 0;
2356 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2357 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2358 	mr->hw_mr.fbo = 0;
2359 	mr->hw_mr.length = 0;
2360 	mr->hw_mr.vaddr = 0;
2361 	mr->hw_mr.zbva = false;
2362 	mr->hw_mr.phy_mr = true;
2363 	mr->hw_mr.dma_mr = false;
2364 
2365 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2366 	if (rc) {
2367 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2368 		goto err1;
2369 	}
2370 
2371 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2372 	mr->ibmr.rkey = mr->ibmr.lkey;
2373 
2374 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2375 	return mr;
2376 
2377 err1:
2378 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2379 err0:
2380 	kfree(mr);
2381 	return ERR_PTR(rc);
2382 }
2383 
2384 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2385 			    enum ib_mr_type mr_type, u32 max_num_sg)
2386 {
2387 	struct qedr_dev *dev;
2388 	struct qedr_mr *mr;
2389 
2390 	if (mr_type != IB_MR_TYPE_MEM_REG)
2391 		return ERR_PTR(-EINVAL);
2392 
2393 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2394 
2395 	if (IS_ERR(mr))
2396 		return ERR_PTR(-EINVAL);
2397 
2398 	dev = mr->dev;
2399 
2400 	return &mr->ibmr;
2401 }
2402 
2403 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2404 {
2405 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2406 	struct qedr_pbl *pbl_table;
2407 	struct regpair *pbe;
2408 	u32 pbes_in_page;
2409 
2410 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2411 		DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2412 		return -ENOMEM;
2413 	}
2414 
2415 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2416 		 mr->npages, addr);
2417 
2418 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2419 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2420 	pbe = (struct regpair *)pbl_table->va;
2421 	pbe +=  mr->npages % pbes_in_page;
2422 	pbe->lo = cpu_to_le32((u32)addr);
2423 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2424 
2425 	mr->npages++;
2426 
2427 	return 0;
2428 }
2429 
2430 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2431 {
2432 	int work = info->completed - info->completed_handled - 1;
2433 
2434 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2435 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2436 		struct qedr_pbl *pbl;
2437 
2438 		/* Free all the page list that are possible to be freed
2439 		 * (all the ones that were invalidated), under the assumption
2440 		 * that if an FMR was completed successfully that means that
2441 		 * if there was an invalidate operation before it also ended
2442 		 */
2443 		pbl = list_first_entry(&info->inuse_pbl_list,
2444 				       struct qedr_pbl, list_entry);
2445 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2446 		info->completed_handled++;
2447 	}
2448 }
2449 
2450 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2451 		   int sg_nents, unsigned int *sg_offset)
2452 {
2453 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2454 
2455 	mr->npages = 0;
2456 
2457 	handle_completed_mrs(mr->dev, &mr->info);
2458 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2459 }
2460 
2461 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2462 {
2463 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2464 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2465 	struct qedr_mr *mr;
2466 	int rc;
2467 
2468 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2469 	if (!mr)
2470 		return ERR_PTR(-ENOMEM);
2471 
2472 	mr->type = QEDR_MR_DMA;
2473 
2474 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2475 	if (rc) {
2476 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2477 		goto err1;
2478 	}
2479 
2480 	/* index only, 18 bit long, lkey = itid << 8 | key */
2481 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2482 	mr->hw_mr.pd = pd->pd_id;
2483 	mr->hw_mr.local_read = 1;
2484 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2485 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2486 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2487 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2488 	mr->hw_mr.dma_mr = true;
2489 
2490 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2491 	if (rc) {
2492 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2493 		goto err2;
2494 	}
2495 
2496 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2497 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2498 	    mr->hw_mr.remote_atomic)
2499 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2500 
2501 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2502 	return &mr->ibmr;
2503 
2504 err2:
2505 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2506 err1:
2507 	kfree(mr);
2508 	return ERR_PTR(rc);
2509 }
2510 
2511 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2512 {
2513 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2514 }
2515 
2516 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2517 {
2518 	int i, len = 0;
2519 
2520 	for (i = 0; i < num_sge; i++)
2521 		len += sg_list[i].length;
2522 
2523 	return len;
2524 }
2525 
2526 static void swap_wqe_data64(u64 *p)
2527 {
2528 	int i;
2529 
2530 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2531 		*p = cpu_to_be64(cpu_to_le64(*p));
2532 }
2533 
2534 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2535 				       struct qedr_qp *qp, u8 *wqe_size,
2536 				       struct ib_send_wr *wr,
2537 				       struct ib_send_wr **bad_wr, u8 *bits,
2538 				       u8 bit)
2539 {
2540 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2541 	char *seg_prt, *wqe;
2542 	int i, seg_siz;
2543 
2544 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2545 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2546 		*bad_wr = wr;
2547 		return 0;
2548 	}
2549 
2550 	if (!data_size)
2551 		return data_size;
2552 
2553 	*bits |= bit;
2554 
2555 	seg_prt = NULL;
2556 	wqe = NULL;
2557 	seg_siz = 0;
2558 
2559 	/* Copy data inline */
2560 	for (i = 0; i < wr->num_sge; i++) {
2561 		u32 len = wr->sg_list[i].length;
2562 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2563 
2564 		while (len > 0) {
2565 			u32 cur;
2566 
2567 			/* New segment required */
2568 			if (!seg_siz) {
2569 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2570 				seg_prt = wqe;
2571 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2572 				(*wqe_size)++;
2573 			}
2574 
2575 			/* Calculate currently allowed length */
2576 			cur = min_t(u32, len, seg_siz);
2577 			memcpy(seg_prt, src, cur);
2578 
2579 			/* Update segment variables */
2580 			seg_prt += cur;
2581 			seg_siz -= cur;
2582 
2583 			/* Update sge variables */
2584 			src += cur;
2585 			len -= cur;
2586 
2587 			/* Swap fully-completed segments */
2588 			if (!seg_siz)
2589 				swap_wqe_data64((u64 *)wqe);
2590 		}
2591 	}
2592 
2593 	/* swap last not completed segment */
2594 	if (seg_siz)
2595 		swap_wqe_data64((u64 *)wqe);
2596 
2597 	return data_size;
2598 }
2599 
2600 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2601 	do {							\
2602 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2603 		(sge)->length = cpu_to_le32(vlength);		\
2604 		(sge)->flags = cpu_to_le32(vflags);		\
2605 	} while (0)
2606 
2607 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2608 	do {							\
2609 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2610 		(hdr)->num_sges = num_sge;			\
2611 	} while (0)
2612 
2613 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2614 	do {							\
2615 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2616 		(sge)->length = cpu_to_le32(vlength);		\
2617 		(sge)->l_key = cpu_to_le32(vlkey);		\
2618 	} while (0)
2619 
2620 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2621 				struct ib_send_wr *wr)
2622 {
2623 	u32 data_size = 0;
2624 	int i;
2625 
2626 	for (i = 0; i < wr->num_sge; i++) {
2627 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2628 
2629 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2630 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2631 		sge->length = cpu_to_le32(wr->sg_list[i].length);
2632 		data_size += wr->sg_list[i].length;
2633 	}
2634 
2635 	if (wqe_size)
2636 		*wqe_size += wr->num_sge;
2637 
2638 	return data_size;
2639 }
2640 
2641 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2642 				     struct qedr_qp *qp,
2643 				     struct rdma_sq_rdma_wqe_1st *rwqe,
2644 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
2645 				     struct ib_send_wr *wr,
2646 				     struct ib_send_wr **bad_wr)
2647 {
2648 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2649 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2650 
2651 	if (wr->send_flags & IB_SEND_INLINE &&
2652 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2653 	     wr->opcode == IB_WR_RDMA_WRITE)) {
2654 		u8 flags = 0;
2655 
2656 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2657 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2658 						   bad_wr, &rwqe->flags, flags);
2659 	}
2660 
2661 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2662 }
2663 
2664 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2665 				     struct qedr_qp *qp,
2666 				     struct rdma_sq_send_wqe_1st *swqe,
2667 				     struct rdma_sq_send_wqe_2st *swqe2,
2668 				     struct ib_send_wr *wr,
2669 				     struct ib_send_wr **bad_wr)
2670 {
2671 	memset(swqe2, 0, sizeof(*swqe2));
2672 	if (wr->send_flags & IB_SEND_INLINE) {
2673 		u8 flags = 0;
2674 
2675 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2676 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2677 						   bad_wr, &swqe->flags, flags);
2678 	}
2679 
2680 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2681 }
2682 
2683 static int qedr_prepare_reg(struct qedr_qp *qp,
2684 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
2685 			    struct ib_reg_wr *wr)
2686 {
2687 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
2688 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
2689 
2690 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2691 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2692 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2693 	fwqe1->l_key = wr->key;
2694 
2695 	fwqe2->access_ctrl = 0;
2696 
2697 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2698 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
2699 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2700 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2701 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2702 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2703 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2704 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2705 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2706 	fwqe2->fmr_ctrl = 0;
2707 
2708 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2709 		   ilog2(mr->ibmr.page_size) - 12);
2710 
2711 	fwqe2->length_hi = 0;
2712 	fwqe2->length_lo = mr->ibmr.length;
2713 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2714 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2715 
2716 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
2717 
2718 	return 0;
2719 }
2720 
2721 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2722 {
2723 	switch (opcode) {
2724 	case IB_WR_RDMA_WRITE:
2725 	case IB_WR_RDMA_WRITE_WITH_IMM:
2726 		return IB_WC_RDMA_WRITE;
2727 	case IB_WR_SEND_WITH_IMM:
2728 	case IB_WR_SEND:
2729 	case IB_WR_SEND_WITH_INV:
2730 		return IB_WC_SEND;
2731 	case IB_WR_RDMA_READ:
2732 		return IB_WC_RDMA_READ;
2733 	case IB_WR_ATOMIC_CMP_AND_SWP:
2734 		return IB_WC_COMP_SWAP;
2735 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2736 		return IB_WC_FETCH_ADD;
2737 	case IB_WR_REG_MR:
2738 		return IB_WC_REG_MR;
2739 	case IB_WR_LOCAL_INV:
2740 		return IB_WC_LOCAL_INV;
2741 	default:
2742 		return IB_WC_SEND;
2743 	}
2744 }
2745 
2746 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2747 {
2748 	int wq_is_full, err_wr, pbl_is_full;
2749 	struct qedr_dev *dev = qp->dev;
2750 
2751 	/* prevent SQ overflow and/or processing of a bad WR */
2752 	err_wr = wr->num_sge > qp->sq.max_sges;
2753 	wq_is_full = qedr_wq_is_full(&qp->sq);
2754 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2755 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2756 	if (wq_is_full || err_wr || pbl_is_full) {
2757 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2758 			DP_ERR(dev,
2759 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2760 			       qp);
2761 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2762 		}
2763 
2764 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2765 			DP_ERR(dev,
2766 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2767 			       qp);
2768 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2769 		}
2770 
2771 		if (pbl_is_full &&
2772 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2773 			DP_ERR(dev,
2774 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2775 			       qp);
2776 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2777 		}
2778 		return false;
2779 	}
2780 	return true;
2781 }
2782 
2783 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2784 		     struct ib_send_wr **bad_wr)
2785 {
2786 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2787 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2788 	struct rdma_sq_atomic_wqe_1st *awqe1;
2789 	struct rdma_sq_atomic_wqe_2nd *awqe2;
2790 	struct rdma_sq_atomic_wqe_3rd *awqe3;
2791 	struct rdma_sq_send_wqe_2st *swqe2;
2792 	struct rdma_sq_local_inv_wqe *iwqe;
2793 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
2794 	struct rdma_sq_send_wqe_1st *swqe;
2795 	struct rdma_sq_rdma_wqe_1st *rwqe;
2796 	struct rdma_sq_fmr_wqe_1st *fwqe1;
2797 	struct rdma_sq_common_wqe *wqe;
2798 	u32 length;
2799 	int rc = 0;
2800 	bool comp;
2801 
2802 	if (!qedr_can_post_send(qp, wr)) {
2803 		*bad_wr = wr;
2804 		return -ENOMEM;
2805 	}
2806 
2807 	wqe = qed_chain_produce(&qp->sq.pbl);
2808 	qp->wqe_wr_id[qp->sq.prod].signaled =
2809 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2810 
2811 	wqe->flags = 0;
2812 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2813 		   !!(wr->send_flags & IB_SEND_SOLICITED));
2814 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2815 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2816 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2817 		   !!(wr->send_flags & IB_SEND_FENCE));
2818 	wqe->prev_wqe_size = qp->prev_wqe_size;
2819 
2820 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2821 
2822 	switch (wr->opcode) {
2823 	case IB_WR_SEND_WITH_IMM:
2824 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
2825 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2826 		swqe->wqe_size = 2;
2827 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2828 
2829 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
2830 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2831 						   wr, bad_wr);
2832 		swqe->length = cpu_to_le32(length);
2833 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2834 		qp->prev_wqe_size = swqe->wqe_size;
2835 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2836 		break;
2837 	case IB_WR_SEND:
2838 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
2839 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2840 
2841 		swqe->wqe_size = 2;
2842 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2843 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2844 						   wr, bad_wr);
2845 		swqe->length = cpu_to_le32(length);
2846 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2847 		qp->prev_wqe_size = swqe->wqe_size;
2848 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2849 		break;
2850 	case IB_WR_SEND_WITH_INV:
2851 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
2852 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2853 		swqe2 = qed_chain_produce(&qp->sq.pbl);
2854 		swqe->wqe_size = 2;
2855 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
2856 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2857 						   wr, bad_wr);
2858 		swqe->length = cpu_to_le32(length);
2859 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2860 		qp->prev_wqe_size = swqe->wqe_size;
2861 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2862 		break;
2863 
2864 	case IB_WR_RDMA_WRITE_WITH_IMM:
2865 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
2866 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2867 
2868 		rwqe->wqe_size = 2;
2869 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
2870 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2871 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2872 						   wr, bad_wr);
2873 		rwqe->length = cpu_to_le32(length);
2874 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2875 		qp->prev_wqe_size = rwqe->wqe_size;
2876 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2877 		break;
2878 	case IB_WR_RDMA_WRITE:
2879 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
2880 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2881 
2882 		rwqe->wqe_size = 2;
2883 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2884 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2885 						   wr, bad_wr);
2886 		rwqe->length = cpu_to_le32(length);
2887 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2888 		qp->prev_wqe_size = rwqe->wqe_size;
2889 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2890 		break;
2891 	case IB_WR_RDMA_READ_WITH_INV:
2892 		DP_ERR(dev,
2893 		       "RDMA READ WITH INVALIDATE not supported\n");
2894 		*bad_wr = wr;
2895 		rc = -EINVAL;
2896 		break;
2897 
2898 	case IB_WR_RDMA_READ:
2899 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
2900 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2901 
2902 		rwqe->wqe_size = 2;
2903 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
2904 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2905 						   wr, bad_wr);
2906 		rwqe->length = cpu_to_le32(length);
2907 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2908 		qp->prev_wqe_size = rwqe->wqe_size;
2909 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2910 		break;
2911 
2912 	case IB_WR_ATOMIC_CMP_AND_SWP:
2913 	case IB_WR_ATOMIC_FETCH_AND_ADD:
2914 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
2915 		awqe1->wqe_size = 4;
2916 
2917 		awqe2 = qed_chain_produce(&qp->sq.pbl);
2918 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
2919 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
2920 
2921 		awqe3 = qed_chain_produce(&qp->sq.pbl);
2922 
2923 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2924 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
2925 			DMA_REGPAIR_LE(awqe3->swap_data,
2926 				       atomic_wr(wr)->compare_add);
2927 		} else {
2928 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
2929 			DMA_REGPAIR_LE(awqe3->swap_data,
2930 				       atomic_wr(wr)->swap);
2931 			DMA_REGPAIR_LE(awqe3->cmp_data,
2932 				       atomic_wr(wr)->compare_add);
2933 		}
2934 
2935 		qedr_prepare_sq_sges(qp, NULL, wr);
2936 
2937 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
2938 		qp->prev_wqe_size = awqe1->wqe_size;
2939 		break;
2940 
2941 	case IB_WR_LOCAL_INV:
2942 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
2943 		iwqe->wqe_size = 1;
2944 
2945 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
2946 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
2947 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
2948 		qp->prev_wqe_size = iwqe->wqe_size;
2949 		break;
2950 	case IB_WR_REG_MR:
2951 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
2952 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
2953 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
2954 		fwqe1->wqe_size = 2;
2955 
2956 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
2957 		if (rc) {
2958 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
2959 			*bad_wr = wr;
2960 			break;
2961 		}
2962 
2963 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
2964 		qp->prev_wqe_size = fwqe1->wqe_size;
2965 		break;
2966 	default:
2967 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
2968 		rc = -EINVAL;
2969 		*bad_wr = wr;
2970 		break;
2971 	}
2972 
2973 	if (*bad_wr) {
2974 		u16 value;
2975 
2976 		/* Restore prod to its position before
2977 		 * this WR was processed
2978 		 */
2979 		value = le16_to_cpu(qp->sq.db_data.data.value);
2980 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
2981 
2982 		/* Restore prev_wqe_size */
2983 		qp->prev_wqe_size = wqe->prev_wqe_size;
2984 		rc = -EINVAL;
2985 		DP_ERR(dev, "POST SEND FAILED\n");
2986 	}
2987 
2988 	return rc;
2989 }
2990 
2991 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2992 		   struct ib_send_wr **bad_wr)
2993 {
2994 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2995 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2996 	unsigned long flags;
2997 	int rc = 0;
2998 
2999 	*bad_wr = NULL;
3000 
3001 	if (qp->qp_type == IB_QPT_GSI)
3002 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3003 
3004 	spin_lock_irqsave(&qp->q_lock, flags);
3005 
3006 	if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3007 	    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3008 	    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3009 		spin_unlock_irqrestore(&qp->q_lock, flags);
3010 		*bad_wr = wr;
3011 		DP_DEBUG(dev, QEDR_MSG_CQ,
3012 			 "QP in wrong state! QP icid=0x%x state %d\n",
3013 			 qp->icid, qp->state);
3014 		return -EINVAL;
3015 	}
3016 
3017 	while (wr) {
3018 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3019 		if (rc)
3020 			break;
3021 
3022 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3023 
3024 		qedr_inc_sw_prod(&qp->sq);
3025 
3026 		qp->sq.db_data.data.value++;
3027 
3028 		wr = wr->next;
3029 	}
3030 
3031 	/* Trigger doorbell
3032 	 * If there was a failure in the first WR then it will be triggered in
3033 	 * vane. However this is not harmful (as long as the producer value is
3034 	 * unchanged). For performance reasons we avoid checking for this
3035 	 * redundant doorbell.
3036 	 */
3037 	wmb();
3038 	writel(qp->sq.db_data.raw, qp->sq.db);
3039 
3040 	/* Make sure write sticks */
3041 	mmiowb();
3042 
3043 	spin_unlock_irqrestore(&qp->q_lock, flags);
3044 
3045 	return rc;
3046 }
3047 
3048 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3049 		   struct ib_recv_wr **bad_wr)
3050 {
3051 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3052 	struct qedr_dev *dev = qp->dev;
3053 	unsigned long flags;
3054 	int status = 0;
3055 
3056 	if (qp->qp_type == IB_QPT_GSI)
3057 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3058 
3059 	spin_lock_irqsave(&qp->q_lock, flags);
3060 
3061 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3062 		spin_unlock_irqrestore(&qp->q_lock, flags);
3063 		*bad_wr = wr;
3064 		return -EINVAL;
3065 	}
3066 
3067 	while (wr) {
3068 		int i;
3069 
3070 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3071 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3072 		    wr->num_sge > qp->rq.max_sges) {
3073 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3074 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3075 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3076 			       qp->rq.max_sges);
3077 			status = -ENOMEM;
3078 			*bad_wr = wr;
3079 			break;
3080 		}
3081 		for (i = 0; i < wr->num_sge; i++) {
3082 			u32 flags = 0;
3083 			struct rdma_rq_sge *rqe =
3084 			    qed_chain_produce(&qp->rq.pbl);
3085 
3086 			/* First one must include the number
3087 			 * of SGE in the list
3088 			 */
3089 			if (!i)
3090 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3091 					  wr->num_sge);
3092 
3093 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3094 				  wr->sg_list[i].lkey);
3095 
3096 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3097 				   wr->sg_list[i].length, flags);
3098 		}
3099 
3100 		/* Special case of no sges. FW requires between 1-4 sges...
3101 		 * in this case we need to post 1 sge with length zero. this is
3102 		 * because rdma write with immediate consumes an RQ.
3103 		 */
3104 		if (!wr->num_sge) {
3105 			u32 flags = 0;
3106 			struct rdma_rq_sge *rqe =
3107 			    qed_chain_produce(&qp->rq.pbl);
3108 
3109 			/* First one must include the number
3110 			 * of SGE in the list
3111 			 */
3112 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3113 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3114 
3115 			RQ_SGE_SET(rqe, 0, 0, flags);
3116 			i = 1;
3117 		}
3118 
3119 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3120 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3121 
3122 		qedr_inc_sw_prod(&qp->rq);
3123 
3124 		/* Flush all the writes before signalling doorbell */
3125 		wmb();
3126 
3127 		qp->rq.db_data.data.value++;
3128 
3129 		writel(qp->rq.db_data.raw, qp->rq.db);
3130 
3131 		/* Make sure write sticks */
3132 		mmiowb();
3133 
3134 		wr = wr->next;
3135 	}
3136 
3137 	spin_unlock_irqrestore(&qp->q_lock, flags);
3138 
3139 	return status;
3140 }
3141 
3142 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3143 {
3144 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3145 
3146 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3147 		cq->pbl_toggle;
3148 }
3149 
3150 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3151 {
3152 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3153 	struct qedr_qp *qp;
3154 
3155 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3156 						   resp_cqe->qp_handle.lo,
3157 						   u64);
3158 	return qp;
3159 }
3160 
3161 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3162 {
3163 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3164 
3165 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3166 }
3167 
3168 /* Return latest CQE (needs processing) */
3169 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3170 {
3171 	return cq->latest_cqe;
3172 }
3173 
3174 /* In fmr we need to increase the number of fmr completed counter for the fmr
3175  * algorithm determining whether we can free a pbl or not.
3176  * we need to perform this whether the work request was signaled or not. for
3177  * this purpose we call this function from the condition that checks if a wr
3178  * should be skipped, to make sure we don't miss it ( possibly this fmr
3179  * operation was not signalted)
3180  */
3181 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3182 {
3183 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3184 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3185 }
3186 
3187 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3188 		       struct qedr_cq *cq, int num_entries,
3189 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3190 		       int force)
3191 {
3192 	u16 cnt = 0;
3193 
3194 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3195 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3196 			qedr_chk_if_fmr(qp);
3197 			/* skip WC */
3198 			goto next_cqe;
3199 		}
3200 
3201 		/* fill WC */
3202 		wc->status = status;
3203 		wc->vendor_err = 0;
3204 		wc->wc_flags = 0;
3205 		wc->src_qp = qp->id;
3206 		wc->qp = &qp->ibqp;
3207 
3208 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3209 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3210 
3211 		switch (wc->opcode) {
3212 		case IB_WC_RDMA_WRITE:
3213 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3214 			break;
3215 		case IB_WC_COMP_SWAP:
3216 		case IB_WC_FETCH_ADD:
3217 			wc->byte_len = 8;
3218 			break;
3219 		case IB_WC_REG_MR:
3220 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3221 			break;
3222 		case IB_WC_RDMA_READ:
3223 		case IB_WC_SEND:
3224 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3225 			break;
3226 		default:
3227 			break;
3228 		}
3229 
3230 		num_entries--;
3231 		wc++;
3232 		cnt++;
3233 next_cqe:
3234 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3235 			qed_chain_consume(&qp->sq.pbl);
3236 		qedr_inc_sw_cons(&qp->sq);
3237 	}
3238 
3239 	return cnt;
3240 }
3241 
3242 static int qedr_poll_cq_req(struct qedr_dev *dev,
3243 			    struct qedr_qp *qp, struct qedr_cq *cq,
3244 			    int num_entries, struct ib_wc *wc,
3245 			    struct rdma_cqe_requester *req)
3246 {
3247 	int cnt = 0;
3248 
3249 	switch (req->status) {
3250 	case RDMA_CQE_REQ_STS_OK:
3251 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3252 				  IB_WC_SUCCESS, 0);
3253 		break;
3254 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3255 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3256 			DP_ERR(dev,
3257 			       "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3258 			       cq->icid, qp->icid);
3259 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3260 				  IB_WC_WR_FLUSH_ERR, 1);
3261 		break;
3262 	default:
3263 		/* process all WQE before the cosumer */
3264 		qp->state = QED_ROCE_QP_STATE_ERR;
3265 		cnt = process_req(dev, qp, cq, num_entries, wc,
3266 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3267 		wc += cnt;
3268 		/* if we have extra WC fill it with actual error info */
3269 		if (cnt < num_entries) {
3270 			enum ib_wc_status wc_status;
3271 
3272 			switch (req->status) {
3273 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3274 				DP_ERR(dev,
3275 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3276 				       cq->icid, qp->icid);
3277 				wc_status = IB_WC_BAD_RESP_ERR;
3278 				break;
3279 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3280 				DP_ERR(dev,
3281 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3282 				       cq->icid, qp->icid);
3283 				wc_status = IB_WC_LOC_LEN_ERR;
3284 				break;
3285 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3286 				DP_ERR(dev,
3287 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3288 				       cq->icid, qp->icid);
3289 				wc_status = IB_WC_LOC_QP_OP_ERR;
3290 				break;
3291 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3292 				DP_ERR(dev,
3293 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3294 				       cq->icid, qp->icid);
3295 				wc_status = IB_WC_LOC_PROT_ERR;
3296 				break;
3297 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3298 				DP_ERR(dev,
3299 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3300 				       cq->icid, qp->icid);
3301 				wc_status = IB_WC_MW_BIND_ERR;
3302 				break;
3303 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3304 				DP_ERR(dev,
3305 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3306 				       cq->icid, qp->icid);
3307 				wc_status = IB_WC_REM_INV_REQ_ERR;
3308 				break;
3309 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3310 				DP_ERR(dev,
3311 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3312 				       cq->icid, qp->icid);
3313 				wc_status = IB_WC_REM_ACCESS_ERR;
3314 				break;
3315 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3316 				DP_ERR(dev,
3317 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3318 				       cq->icid, qp->icid);
3319 				wc_status = IB_WC_REM_OP_ERR;
3320 				break;
3321 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3322 				DP_ERR(dev,
3323 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3324 				       cq->icid, qp->icid);
3325 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3326 				break;
3327 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3328 				DP_ERR(dev,
3329 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3330 				       cq->icid, qp->icid);
3331 				wc_status = IB_WC_RETRY_EXC_ERR;
3332 				break;
3333 			default:
3334 				DP_ERR(dev,
3335 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3336 				       cq->icid, qp->icid);
3337 				wc_status = IB_WC_GENERAL_ERR;
3338 			}
3339 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3340 					   wc_status, 1);
3341 		}
3342 	}
3343 
3344 	return cnt;
3345 }
3346 
3347 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3348 {
3349 	switch (status) {
3350 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3351 		return IB_WC_LOC_ACCESS_ERR;
3352 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3353 		return IB_WC_LOC_LEN_ERR;
3354 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3355 		return IB_WC_LOC_QP_OP_ERR;
3356 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3357 		return IB_WC_LOC_PROT_ERR;
3358 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3359 		return IB_WC_MW_BIND_ERR;
3360 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3361 		return IB_WC_REM_INV_RD_REQ_ERR;
3362 	case RDMA_CQE_RESP_STS_OK:
3363 		return IB_WC_SUCCESS;
3364 	default:
3365 		return IB_WC_GENERAL_ERR;
3366 	}
3367 }
3368 
3369 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3370 					  struct ib_wc *wc)
3371 {
3372 	wc->status = IB_WC_SUCCESS;
3373 	wc->byte_len = le32_to_cpu(resp->length);
3374 
3375 	if (resp->flags & QEDR_RESP_IMM) {
3376 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3377 		wc->wc_flags |= IB_WC_WITH_IMM;
3378 
3379 		if (resp->flags & QEDR_RESP_RDMA)
3380 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3381 
3382 		if (resp->flags & QEDR_RESP_INV)
3383 			return -EINVAL;
3384 
3385 	} else if (resp->flags & QEDR_RESP_INV) {
3386 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3387 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3388 
3389 		if (resp->flags & QEDR_RESP_RDMA)
3390 			return -EINVAL;
3391 
3392 	} else if (resp->flags & QEDR_RESP_RDMA) {
3393 		return -EINVAL;
3394 	}
3395 
3396 	return 0;
3397 }
3398 
3399 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3400 			       struct qedr_cq *cq, struct ib_wc *wc,
3401 			       struct rdma_cqe_responder *resp, u64 wr_id)
3402 {
3403 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3404 	wc->opcode = IB_WC_RECV;
3405 	wc->wc_flags = 0;
3406 
3407 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3408 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3409 			DP_ERR(dev,
3410 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3411 			       cq, cq->icid, resp->flags);
3412 
3413 	} else {
3414 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3415 		if (wc->status == IB_WC_GENERAL_ERR)
3416 			DP_ERR(dev,
3417 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3418 			       cq, cq->icid, resp->status);
3419 	}
3420 
3421 	/* Fill the rest of the WC */
3422 	wc->vendor_err = 0;
3423 	wc->src_qp = qp->id;
3424 	wc->qp = &qp->ibqp;
3425 	wc->wr_id = wr_id;
3426 }
3427 
3428 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3429 			    struct qedr_cq *cq, struct ib_wc *wc,
3430 			    struct rdma_cqe_responder *resp)
3431 {
3432 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3433 
3434 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3435 
3436 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3437 		qed_chain_consume(&qp->rq.pbl);
3438 	qedr_inc_sw_cons(&qp->rq);
3439 
3440 	return 1;
3441 }
3442 
3443 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3444 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3445 {
3446 	u16 cnt = 0;
3447 
3448 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3449 		/* fill WC */
3450 		wc->status = IB_WC_WR_FLUSH_ERR;
3451 		wc->vendor_err = 0;
3452 		wc->wc_flags = 0;
3453 		wc->src_qp = qp->id;
3454 		wc->byte_len = 0;
3455 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3456 		wc->qp = &qp->ibqp;
3457 		num_entries--;
3458 		wc++;
3459 		cnt++;
3460 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3461 			qed_chain_consume(&qp->rq.pbl);
3462 		qedr_inc_sw_cons(&qp->rq);
3463 	}
3464 
3465 	return cnt;
3466 }
3467 
3468 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3469 				 struct rdma_cqe_responder *resp, int *update)
3470 {
3471 	if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
3472 		consume_cqe(cq);
3473 		*update |= 1;
3474 	}
3475 }
3476 
3477 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3478 			     struct qedr_cq *cq, int num_entries,
3479 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
3480 			     int *update)
3481 {
3482 	int cnt;
3483 
3484 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3485 		cnt = process_resp_flush(qp, cq, num_entries, wc,
3486 					 resp->rq_cons);
3487 		try_consume_resp_cqe(cq, qp, resp, update);
3488 	} else {
3489 		cnt = process_resp_one(dev, qp, cq, wc, resp);
3490 		consume_cqe(cq);
3491 		*update |= 1;
3492 	}
3493 
3494 	return cnt;
3495 }
3496 
3497 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3498 				struct rdma_cqe_requester *req, int *update)
3499 {
3500 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3501 		consume_cqe(cq);
3502 		*update |= 1;
3503 	}
3504 }
3505 
3506 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3507 {
3508 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3509 	struct qedr_cq *cq = get_qedr_cq(ibcq);
3510 	union rdma_cqe *cqe = cq->latest_cqe;
3511 	u32 old_cons, new_cons;
3512 	unsigned long flags;
3513 	int update = 0;
3514 	int done = 0;
3515 
3516 	if (cq->destroyed) {
3517 		DP_ERR(dev,
3518 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3519 		       cq, cq->icid);
3520 		return 0;
3521 	}
3522 
3523 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3524 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3525 
3526 	spin_lock_irqsave(&cq->cq_lock, flags);
3527 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3528 	while (num_entries && is_valid_cqe(cq, cqe)) {
3529 		struct qedr_qp *qp;
3530 		int cnt = 0;
3531 
3532 		/* prevent speculative reads of any field of CQE */
3533 		rmb();
3534 
3535 		qp = cqe_get_qp(cqe);
3536 		if (!qp) {
3537 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3538 			break;
3539 		}
3540 
3541 		wc->qp = &qp->ibqp;
3542 
3543 		switch (cqe_get_type(cqe)) {
3544 		case RDMA_CQE_TYPE_REQUESTER:
3545 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3546 					       &cqe->req);
3547 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
3548 			break;
3549 		case RDMA_CQE_TYPE_RESPONDER_RQ:
3550 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3551 						&cqe->resp, &update);
3552 			break;
3553 		case RDMA_CQE_TYPE_INVALID:
3554 		default:
3555 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
3556 			       cqe_get_type(cqe));
3557 		}
3558 		num_entries -= cnt;
3559 		wc += cnt;
3560 		done += cnt;
3561 
3562 		cqe = get_cqe(cq);
3563 	}
3564 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3565 
3566 	cq->cq_cons += new_cons - old_cons;
3567 
3568 	if (update)
3569 		/* doorbell notifies abount latest VALID entry,
3570 		 * but chain already point to the next INVALID one
3571 		 */
3572 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3573 
3574 	spin_unlock_irqrestore(&cq->cq_lock, flags);
3575 	return done;
3576 }
3577 
3578 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3579 		     u8 port_num,
3580 		     const struct ib_wc *in_wc,
3581 		     const struct ib_grh *in_grh,
3582 		     const struct ib_mad_hdr *mad_hdr,
3583 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
3584 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
3585 {
3586 	struct qedr_dev *dev = get_qedr_dev(ibdev);
3587 
3588 	DP_DEBUG(dev, QEDR_MSG_GSI,
3589 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3590 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3591 		 mad_hdr->class_specific, mad_hdr->class_version,
3592 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3593 	return IB_MAD_RESULT_SUCCESS;
3594 }
3595 
3596 int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
3597 			struct ib_port_immutable *immutable)
3598 {
3599 	struct ib_port_attr attr;
3600 	int err;
3601 
3602 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3603 				    RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3604 
3605 	err = ib_query_port(ibdev, port_num, &attr);
3606 	if (err)
3607 		return err;
3608 
3609 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
3610 	immutable->gid_tbl_len = attr.gid_tbl_len;
3611 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
3612 
3613 	return 0;
3614 }
3615