xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 97f61250)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
55 
56 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
57 #define	RDMA_MAX_SGE_PER_SRQ	(4)
58 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
59 
60 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
61 
62 enum {
63 	QEDR_USER_MMAP_IO_WC = 0,
64 	QEDR_USER_MMAP_PHYS_PAGE,
65 };
66 
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
68 					size_t len)
69 {
70 	size_t min_len = min_t(size_t, len, udata->outlen);
71 
72 	return ib_copy_to_udata(udata, src, min_len);
73 }
74 
75 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
76 {
77 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
78 		return -EINVAL;
79 
80 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
81 	return 0;
82 }
83 
84 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
85 		      int index, union ib_gid *sgid)
86 {
87 	struct qedr_dev *dev = get_qedr_dev(ibdev);
88 
89 	memset(sgid->raw, 0, sizeof(sgid->raw));
90 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
91 
92 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 		 sgid->global.interface_id, sgid->global.subnet_prefix);
94 
95 	return 0;
96 }
97 
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
99 {
100 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 	struct qedr_device_attr *qattr = &dev->attr;
102 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
103 
104 	srq_attr->srq_limit = srq->srq_limit;
105 	srq_attr->max_wr = qattr->max_srq_wr;
106 	srq_attr->max_sge = qattr->max_sge;
107 
108 	return 0;
109 }
110 
111 int qedr_query_device(struct ib_device *ibdev,
112 		      struct ib_device_attr *attr, struct ib_udata *udata)
113 {
114 	struct qedr_dev *dev = get_qedr_dev(ibdev);
115 	struct qedr_device_attr *qattr = &dev->attr;
116 
117 	if (!dev->rdma_ctx) {
118 		DP_ERR(dev,
119 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
120 		       dev->rdma_ctx);
121 		return -EINVAL;
122 	}
123 
124 	memset(attr, 0, sizeof(*attr));
125 
126 	attr->fw_ver = qattr->fw_ver;
127 	attr->sys_image_guid = qattr->sys_image_guid;
128 	attr->max_mr_size = qattr->max_mr_size;
129 	attr->page_size_cap = qattr->page_size_caps;
130 	attr->vendor_id = qattr->vendor_id;
131 	attr->vendor_part_id = qattr->vendor_part_id;
132 	attr->hw_ver = qattr->hw_ver;
133 	attr->max_qp = qattr->max_qp;
134 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 	    IB_DEVICE_RC_RNR_NAK_GEN |
137 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
138 
139 	attr->max_send_sge = qattr->max_sge;
140 	attr->max_recv_sge = qattr->max_sge;
141 	attr->max_sge_rd = qattr->max_sge;
142 	attr->max_cq = qattr->max_cq;
143 	attr->max_cqe = qattr->max_cqe;
144 	attr->max_mr = qattr->max_mr;
145 	attr->max_mw = qattr->max_mw;
146 	attr->max_pd = qattr->max_pd;
147 	attr->atomic_cap = dev->atomic_cap;
148 	attr->max_fmr = qattr->max_fmr;
149 	attr->max_map_per_fmr = 16;
150 	attr->max_qp_init_rd_atom =
151 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
152 	attr->max_qp_rd_atom =
153 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
154 		attr->max_qp_init_rd_atom);
155 
156 	attr->max_srq = qattr->max_srq;
157 	attr->max_srq_sge = qattr->max_srq_sge;
158 	attr->max_srq_wr = qattr->max_srq_wr;
159 
160 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
161 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
162 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
163 	attr->max_ah = qattr->max_ah;
164 
165 	return 0;
166 }
167 
168 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
169 					    u8 *ib_width)
170 {
171 	switch (speed) {
172 	case 1000:
173 		*ib_speed = IB_SPEED_SDR;
174 		*ib_width = IB_WIDTH_1X;
175 		break;
176 	case 10000:
177 		*ib_speed = IB_SPEED_QDR;
178 		*ib_width = IB_WIDTH_1X;
179 		break;
180 
181 	case 20000:
182 		*ib_speed = IB_SPEED_DDR;
183 		*ib_width = IB_WIDTH_4X;
184 		break;
185 
186 	case 25000:
187 		*ib_speed = IB_SPEED_EDR;
188 		*ib_width = IB_WIDTH_1X;
189 		break;
190 
191 	case 40000:
192 		*ib_speed = IB_SPEED_QDR;
193 		*ib_width = IB_WIDTH_4X;
194 		break;
195 
196 	case 50000:
197 		*ib_speed = IB_SPEED_HDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 100000:
202 		*ib_speed = IB_SPEED_EDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	default:
207 		/* Unsupported */
208 		*ib_speed = IB_SPEED_SDR;
209 		*ib_width = IB_WIDTH_1X;
210 	}
211 }
212 
213 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
214 {
215 	struct qedr_dev *dev;
216 	struct qed_rdma_port *rdma_port;
217 
218 	dev = get_qedr_dev(ibdev);
219 
220 	if (!dev->rdma_ctx) {
221 		DP_ERR(dev, "rdma_ctx is NULL\n");
222 		return -EINVAL;
223 	}
224 
225 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
226 
227 	/* *attr being zeroed by the caller, avoid zeroing it here */
228 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
229 		attr->state = IB_PORT_ACTIVE;
230 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
231 	} else {
232 		attr->state = IB_PORT_DOWN;
233 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
234 	}
235 	attr->max_mtu = IB_MTU_4096;
236 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
237 	attr->lid = 0;
238 	attr->lmc = 0;
239 	attr->sm_lid = 0;
240 	attr->sm_sl = 0;
241 	attr->ip_gids = true;
242 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
243 		attr->gid_tbl_len = 1;
244 		attr->pkey_tbl_len = 1;
245 	} else {
246 		attr->gid_tbl_len = QEDR_MAX_SGID;
247 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
248 	}
249 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
250 	attr->qkey_viol_cntr = 0;
251 	get_link_speed_and_width(rdma_port->link_speed,
252 				 &attr->active_speed, &attr->active_width);
253 	attr->max_msg_sz = rdma_port->max_msg_size;
254 	attr->max_vl_num = 4;
255 
256 	return 0;
257 }
258 
259 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
260 		     struct ib_port_modify *props)
261 {
262 	return 0;
263 }
264 
265 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
266 {
267 	struct ib_device *ibdev = uctx->device;
268 	int rc;
269 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
270 	struct qedr_alloc_ucontext_resp uresp = {};
271 	struct qedr_alloc_ucontext_req ureq = {};
272 	struct qedr_dev *dev = get_qedr_dev(ibdev);
273 	struct qed_rdma_add_user_out_params oparams;
274 	struct qedr_user_mmap_entry *entry;
275 
276 	if (!udata)
277 		return -EFAULT;
278 
279 	if (udata->inlen) {
280 		rc = ib_copy_from_udata(&ureq, udata,
281 					min(sizeof(ureq), udata->inlen));
282 		if (rc) {
283 			DP_ERR(dev, "Problem copying data from user space\n");
284 			return -EFAULT;
285 		}
286 
287 		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
288 	}
289 
290 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
291 	if (rc) {
292 		DP_ERR(dev,
293 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
294 		       rc);
295 		return rc;
296 	}
297 
298 	ctx->dpi = oparams.dpi;
299 	ctx->dpi_addr = oparams.dpi_addr;
300 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
301 	ctx->dpi_size = oparams.dpi_size;
302 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
303 	if (!entry) {
304 		rc = -ENOMEM;
305 		goto err;
306 	}
307 
308 	entry->io_address = ctx->dpi_phys_addr;
309 	entry->length = ctx->dpi_size;
310 	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
311 	entry->dpi = ctx->dpi;
312 	entry->dev = dev;
313 	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
314 					 ctx->dpi_size);
315 	if (rc) {
316 		kfree(entry);
317 		goto err;
318 	}
319 	ctx->db_mmap_entry = &entry->rdma_entry;
320 
321 	uresp.dpm_enabled = dev->user_dpm_enabled;
322 	uresp.wids_enabled = 1;
323 	uresp.wid_count = oparams.wid_count;
324 	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
325 	uresp.db_size = ctx->dpi_size;
326 	uresp.max_send_wr = dev->attr.max_sqe;
327 	uresp.max_recv_wr = dev->attr.max_rqe;
328 	uresp.max_srq_wr = dev->attr.max_srq_wr;
329 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
330 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
331 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
332 	uresp.max_cqes = QEDR_MAX_CQES;
333 
334 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
335 	if (rc)
336 		goto err;
337 
338 	ctx->dev = dev;
339 
340 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
341 		 &ctx->ibucontext);
342 	return 0;
343 
344 err:
345 	if (!ctx->db_mmap_entry)
346 		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
347 	else
348 		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
349 
350 	return rc;
351 }
352 
353 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
354 {
355 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
356 
357 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
358 		 uctx);
359 
360 	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
361 }
362 
363 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
364 {
365 	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
366 	struct qedr_dev *dev = entry->dev;
367 
368 	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
369 		free_page((unsigned long)entry->address);
370 	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
371 		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
372 
373 	kfree(entry);
374 }
375 
376 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
377 {
378 	struct ib_device *dev = ucontext->device;
379 	size_t length = vma->vm_end - vma->vm_start;
380 	struct rdma_user_mmap_entry *rdma_entry;
381 	struct qedr_user_mmap_entry *entry;
382 	int rc = 0;
383 	u64 pfn;
384 
385 	ibdev_dbg(dev,
386 		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
387 		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
388 
389 	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
390 	if (!rdma_entry) {
391 		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
392 			  vma->vm_pgoff);
393 		return -EINVAL;
394 	}
395 	entry = get_qedr_mmap_entry(rdma_entry);
396 	ibdev_dbg(dev,
397 		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
398 		  entry->io_address, length, entry->mmap_flag);
399 
400 	switch (entry->mmap_flag) {
401 	case QEDR_USER_MMAP_IO_WC:
402 		pfn = entry->io_address >> PAGE_SHIFT;
403 		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
404 				       pgprot_writecombine(vma->vm_page_prot),
405 				       rdma_entry);
406 		break;
407 	case QEDR_USER_MMAP_PHYS_PAGE:
408 		rc = vm_insert_page(vma, vma->vm_start,
409 				    virt_to_page(entry->address));
410 		break;
411 	default:
412 		rc = -EINVAL;
413 	}
414 
415 	if (rc)
416 		ibdev_dbg(dev,
417 			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
418 			  entry->io_address, length, entry->mmap_flag, rc);
419 
420 	rdma_user_mmap_entry_put(rdma_entry);
421 	return rc;
422 }
423 
424 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
425 {
426 	struct ib_device *ibdev = ibpd->device;
427 	struct qedr_dev *dev = get_qedr_dev(ibdev);
428 	struct qedr_pd *pd = get_qedr_pd(ibpd);
429 	u16 pd_id;
430 	int rc;
431 
432 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
433 		 udata ? "User Lib" : "Kernel");
434 
435 	if (!dev->rdma_ctx) {
436 		DP_ERR(dev, "invalid RDMA context\n");
437 		return -EINVAL;
438 	}
439 
440 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
441 	if (rc)
442 		return rc;
443 
444 	pd->pd_id = pd_id;
445 
446 	if (udata) {
447 		struct qedr_alloc_pd_uresp uresp = {
448 			.pd_id = pd_id,
449 		};
450 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
451 			udata, struct qedr_ucontext, ibucontext);
452 
453 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
454 		if (rc) {
455 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
456 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
457 			return rc;
458 		}
459 
460 		pd->uctx = context;
461 		pd->uctx->pd = pd;
462 	}
463 
464 	return 0;
465 }
466 
467 void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
468 {
469 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
470 	struct qedr_pd *pd = get_qedr_pd(ibpd);
471 
472 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
473 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
474 }
475 
476 static void qedr_free_pbl(struct qedr_dev *dev,
477 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
478 {
479 	struct pci_dev *pdev = dev->pdev;
480 	int i;
481 
482 	for (i = 0; i < pbl_info->num_pbls; i++) {
483 		if (!pbl[i].va)
484 			continue;
485 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
486 				  pbl[i].va, pbl[i].pa);
487 	}
488 
489 	kfree(pbl);
490 }
491 
492 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
493 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
494 
495 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
496 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
497 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
498 
499 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
500 					   struct qedr_pbl_info *pbl_info,
501 					   gfp_t flags)
502 {
503 	struct pci_dev *pdev = dev->pdev;
504 	struct qedr_pbl *pbl_table;
505 	dma_addr_t *pbl_main_tbl;
506 	dma_addr_t pa;
507 	void *va;
508 	int i;
509 
510 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
511 	if (!pbl_table)
512 		return ERR_PTR(-ENOMEM);
513 
514 	for (i = 0; i < pbl_info->num_pbls; i++) {
515 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
516 					flags);
517 		if (!va)
518 			goto err;
519 
520 		pbl_table[i].va = va;
521 		pbl_table[i].pa = pa;
522 	}
523 
524 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
525 	 * the first one with physical pointers to all of the rest
526 	 */
527 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
528 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
529 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
530 
531 	return pbl_table;
532 
533 err:
534 	for (i--; i >= 0; i--)
535 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
536 				  pbl_table[i].va, pbl_table[i].pa);
537 
538 	qedr_free_pbl(dev, pbl_info, pbl_table);
539 
540 	return ERR_PTR(-ENOMEM);
541 }
542 
543 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
544 				struct qedr_pbl_info *pbl_info,
545 				u32 num_pbes, int two_layer_capable)
546 {
547 	u32 pbl_capacity;
548 	u32 pbl_size;
549 	u32 num_pbls;
550 
551 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
552 		if (num_pbes > MAX_PBES_TWO_LAYER) {
553 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
554 			       num_pbes);
555 			return -EINVAL;
556 		}
557 
558 		/* calculate required pbl page size */
559 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
560 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
561 			       NUM_PBES_ON_PAGE(pbl_size);
562 
563 		while (pbl_capacity < num_pbes) {
564 			pbl_size *= 2;
565 			pbl_capacity = pbl_size / sizeof(u64);
566 			pbl_capacity = pbl_capacity * pbl_capacity;
567 		}
568 
569 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
570 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
571 		pbl_info->two_layered = true;
572 	} else {
573 		/* One layered PBL */
574 		num_pbls = 1;
575 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
576 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
577 		pbl_info->two_layered = false;
578 	}
579 
580 	pbl_info->num_pbls = num_pbls;
581 	pbl_info->pbl_size = pbl_size;
582 	pbl_info->num_pbes = num_pbes;
583 
584 	DP_DEBUG(dev, QEDR_MSG_MR,
585 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
586 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
587 
588 	return 0;
589 }
590 
591 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
592 			       struct qedr_pbl *pbl,
593 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
594 {
595 	int pbe_cnt, total_num_pbes = 0;
596 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
597 	struct qedr_pbl *pbl_tbl;
598 	struct sg_dma_page_iter sg_iter;
599 	struct regpair *pbe;
600 	u64 pg_addr;
601 
602 	if (!pbl_info->num_pbes)
603 		return;
604 
605 	/* If we have a two layered pbl, the first pbl points to the rest
606 	 * of the pbls and the first entry lays on the second pbl in the table
607 	 */
608 	if (pbl_info->two_layered)
609 		pbl_tbl = &pbl[1];
610 	else
611 		pbl_tbl = pbl;
612 
613 	pbe = (struct regpair *)pbl_tbl->va;
614 	if (!pbe) {
615 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
616 		return;
617 	}
618 
619 	pbe_cnt = 0;
620 
621 	fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
622 
623 	for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
624 		pg_addr = sg_page_iter_dma_address(&sg_iter);
625 		for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
626 			pbe->lo = cpu_to_le32(pg_addr);
627 			pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
628 
629 			pg_addr += BIT(pg_shift);
630 			pbe_cnt++;
631 			total_num_pbes++;
632 			pbe++;
633 
634 			if (total_num_pbes == pbl_info->num_pbes)
635 				return;
636 
637 			/* If the given pbl is full storing the pbes,
638 			 * move to next pbl.
639 			 */
640 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
641 				pbl_tbl++;
642 				pbe = (struct regpair *)pbl_tbl->va;
643 				pbe_cnt = 0;
644 			}
645 
646 			fw_pg_cnt++;
647 		}
648 	}
649 }
650 
651 static int qedr_db_recovery_add(struct qedr_dev *dev,
652 				void __iomem *db_addr,
653 				void *db_data,
654 				enum qed_db_rec_width db_width,
655 				enum qed_db_rec_space db_space)
656 {
657 	if (!db_data) {
658 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
659 		return 0;
660 	}
661 
662 	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
663 						 db_width, db_space);
664 }
665 
666 static void qedr_db_recovery_del(struct qedr_dev *dev,
667 				 void __iomem *db_addr,
668 				 void *db_data)
669 {
670 	if (!db_data) {
671 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
672 		return;
673 	}
674 
675 	/* Ignore return code as there is not much we can do about it. Error
676 	 * log will be printed inside.
677 	 */
678 	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
679 }
680 
681 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
682 			      struct qedr_cq *cq, struct ib_udata *udata,
683 			      u32 db_offset)
684 {
685 	struct qedr_create_cq_uresp uresp;
686 	int rc;
687 
688 	memset(&uresp, 0, sizeof(uresp));
689 
690 	uresp.db_offset = db_offset;
691 	uresp.icid = cq->icid;
692 	uresp.db_rec_addr = rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
693 
694 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
695 	if (rc)
696 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
697 
698 	return rc;
699 }
700 
701 static void consume_cqe(struct qedr_cq *cq)
702 {
703 	if (cq->latest_cqe == cq->toggle_cqe)
704 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
705 
706 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
707 }
708 
709 static inline int qedr_align_cq_entries(int entries)
710 {
711 	u64 size, aligned_size;
712 
713 	/* We allocate an extra entry that we don't report to the FW. */
714 	size = (entries + 1) * QEDR_CQE_SIZE;
715 	aligned_size = ALIGN(size, PAGE_SIZE);
716 
717 	return aligned_size / QEDR_CQE_SIZE;
718 }
719 
720 static int qedr_init_user_db_rec(struct ib_udata *udata,
721 				 struct qedr_dev *dev, struct qedr_userq *q,
722 				 bool requires_db_rec)
723 {
724 	struct qedr_ucontext *uctx =
725 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
726 					  ibucontext);
727 	struct qedr_user_mmap_entry *entry;
728 	int rc;
729 
730 	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
731 	if (requires_db_rec == 0 || !uctx->db_rec)
732 		return 0;
733 
734 	/* Allocate a page for doorbell recovery, add to mmap */
735 	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
736 	if (!q->db_rec_data) {
737 		DP_ERR(dev, "get_zeroed_page failed\n");
738 		return -ENOMEM;
739 	}
740 
741 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
742 	if (!entry)
743 		goto err_free_db_data;
744 
745 	entry->address = q->db_rec_data;
746 	entry->length = PAGE_SIZE;
747 	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
748 	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
749 					 &entry->rdma_entry,
750 					 PAGE_SIZE);
751 	if (rc)
752 		goto err_free_entry;
753 
754 	q->db_mmap_entry = &entry->rdma_entry;
755 
756 	return 0;
757 
758 err_free_entry:
759 	kfree(entry);
760 
761 err_free_db_data:
762 	free_page((unsigned long)q->db_rec_data);
763 	q->db_rec_data = NULL;
764 	return -ENOMEM;
765 }
766 
767 static inline int qedr_init_user_queue(struct ib_udata *udata,
768 				       struct qedr_dev *dev,
769 				       struct qedr_userq *q, u64 buf_addr,
770 				       size_t buf_len, bool requires_db_rec,
771 				       int access, int dmasync,
772 				       int alloc_and_init)
773 {
774 	u32 fw_pages;
775 	int rc;
776 
777 	q->buf_addr = buf_addr;
778 	q->buf_len = buf_len;
779 	q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync);
780 	if (IS_ERR(q->umem)) {
781 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
782 		       PTR_ERR(q->umem));
783 		return PTR_ERR(q->umem);
784 	}
785 
786 	fw_pages = ib_umem_page_count(q->umem) <<
787 	    (PAGE_SHIFT - FW_PAGE_SHIFT);
788 
789 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
790 	if (rc)
791 		goto err0;
792 
793 	if (alloc_and_init) {
794 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
795 		if (IS_ERR(q->pbl_tbl)) {
796 			rc = PTR_ERR(q->pbl_tbl);
797 			goto err0;
798 		}
799 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
800 				   FW_PAGE_SHIFT);
801 	} else {
802 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
803 		if (!q->pbl_tbl) {
804 			rc = -ENOMEM;
805 			goto err0;
806 		}
807 	}
808 
809 	/* mmap the user address used to store doorbell data for recovery */
810 	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
811 
812 err0:
813 	ib_umem_release(q->umem);
814 	q->umem = NULL;
815 
816 	return rc;
817 }
818 
819 static inline void qedr_init_cq_params(struct qedr_cq *cq,
820 				       struct qedr_ucontext *ctx,
821 				       struct qedr_dev *dev, int vector,
822 				       int chain_entries, int page_cnt,
823 				       u64 pbl_ptr,
824 				       struct qed_rdma_create_cq_in_params
825 				       *params)
826 {
827 	memset(params, 0, sizeof(*params));
828 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
829 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
830 	params->cnq_id = vector;
831 	params->cq_size = chain_entries - 1;
832 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
833 	params->pbl_num_pages = page_cnt;
834 	params->pbl_ptr = pbl_ptr;
835 	params->pbl_two_level = 0;
836 }
837 
838 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
839 {
840 	cq->db.data.agg_flags = flags;
841 	cq->db.data.value = cpu_to_le32(cons);
842 	writeq(cq->db.raw, cq->db_addr);
843 }
844 
845 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
846 {
847 	struct qedr_cq *cq = get_qedr_cq(ibcq);
848 	unsigned long sflags;
849 	struct qedr_dev *dev;
850 
851 	dev = get_qedr_dev(ibcq->device);
852 
853 	if (cq->destroyed) {
854 		DP_ERR(dev,
855 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
856 		       cq, cq->icid);
857 		return -EINVAL;
858 	}
859 
860 
861 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
862 		return 0;
863 
864 	spin_lock_irqsave(&cq->cq_lock, sflags);
865 
866 	cq->arm_flags = 0;
867 
868 	if (flags & IB_CQ_SOLICITED)
869 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
870 
871 	if (flags & IB_CQ_NEXT_COMP)
872 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
873 
874 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
875 
876 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
877 
878 	return 0;
879 }
880 
881 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
882 		   struct ib_udata *udata)
883 {
884 	struct ib_device *ibdev = ibcq->device;
885 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
886 		udata, struct qedr_ucontext, ibucontext);
887 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
888 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
889 	struct qedr_dev *dev = get_qedr_dev(ibdev);
890 	struct qed_rdma_create_cq_in_params params;
891 	struct qedr_create_cq_ureq ureq = {};
892 	int vector = attr->comp_vector;
893 	int entries = attr->cqe;
894 	struct qedr_cq *cq = get_qedr_cq(ibcq);
895 	int chain_entries;
896 	u32 db_offset;
897 	int page_cnt;
898 	u64 pbl_ptr;
899 	u16 icid;
900 	int rc;
901 
902 	DP_DEBUG(dev, QEDR_MSG_INIT,
903 		 "create_cq: called from %s. entries=%d, vector=%d\n",
904 		 udata ? "User Lib" : "Kernel", entries, vector);
905 
906 	if (entries > QEDR_MAX_CQES) {
907 		DP_ERR(dev,
908 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
909 		       entries, QEDR_MAX_CQES);
910 		return -EINVAL;
911 	}
912 
913 	chain_entries = qedr_align_cq_entries(entries);
914 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
915 
916 	/* calc db offset. user will add DPI base, kernel will add db addr */
917 	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
918 
919 	if (udata) {
920 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
921 							 udata->inlen))) {
922 			DP_ERR(dev,
923 			       "create cq: problem copying data from user space\n");
924 			goto err0;
925 		}
926 
927 		if (!ureq.len) {
928 			DP_ERR(dev,
929 			       "create cq: cannot create a cq with 0 entries\n");
930 			goto err0;
931 		}
932 
933 		cq->cq_type = QEDR_CQ_TYPE_USER;
934 
935 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
936 					  ureq.len, true,
937 					  IB_ACCESS_LOCAL_WRITE,
938 					  1, 1);
939 		if (rc)
940 			goto err0;
941 
942 		pbl_ptr = cq->q.pbl_tbl->pa;
943 		page_cnt = cq->q.pbl_info.num_pbes;
944 
945 		cq->ibcq.cqe = chain_entries;
946 		cq->q.db_addr = ctx->dpi_addr + db_offset;
947 	} else {
948 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
949 
950 		rc = dev->ops->common->chain_alloc(dev->cdev,
951 						   QED_CHAIN_USE_TO_CONSUME,
952 						   QED_CHAIN_MODE_PBL,
953 						   QED_CHAIN_CNT_TYPE_U32,
954 						   chain_entries,
955 						   sizeof(union rdma_cqe),
956 						   &cq->pbl, NULL);
957 		if (rc)
958 			goto err0;
959 
960 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
961 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
962 		cq->ibcq.cqe = cq->pbl.capacity;
963 	}
964 
965 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
966 			    pbl_ptr, &params);
967 
968 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
969 	if (rc)
970 		goto err1;
971 
972 	cq->icid = icid;
973 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
974 	spin_lock_init(&cq->cq_lock);
975 
976 	if (udata) {
977 		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
978 		if (rc)
979 			goto err2;
980 
981 		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
982 					  &cq->q.db_rec_data->db_data,
983 					  DB_REC_WIDTH_64B,
984 					  DB_REC_USER);
985 		if (rc)
986 			goto err2;
987 
988 	} else {
989 		/* Generate doorbell address. */
990 		cq->db.data.icid = cq->icid;
991 		cq->db_addr = dev->db_addr + db_offset;
992 		cq->db.data.params = DB_AGG_CMD_SET <<
993 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
994 
995 		/* point to the very last element, passing it we will toggle */
996 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
997 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
998 		cq->latest_cqe = NULL;
999 		consume_cqe(cq);
1000 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1001 
1002 		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1003 					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
1004 		if (rc)
1005 			goto err2;
1006 	}
1007 
1008 	DP_DEBUG(dev, QEDR_MSG_CQ,
1009 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1010 		 cq->icid, cq, params.cq_size);
1011 
1012 	return 0;
1013 
1014 err2:
1015 	destroy_iparams.icid = cq->icid;
1016 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1017 				  &destroy_oparams);
1018 err1:
1019 	if (udata) {
1020 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1021 		ib_umem_release(cq->q.umem);
1022 		if (ctx)
1023 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1024 	} else {
1025 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1026 	}
1027 err0:
1028 	return -EINVAL;
1029 }
1030 
1031 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1032 {
1033 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1034 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1035 
1036 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1037 
1038 	return 0;
1039 }
1040 
1041 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1042 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1043 
1044 void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1045 {
1046 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1047 	struct qed_rdma_destroy_cq_out_params oparams;
1048 	struct qed_rdma_destroy_cq_in_params iparams;
1049 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1050 	int iter;
1051 
1052 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1053 
1054 	cq->destroyed = 1;
1055 
1056 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1057 	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1058 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1059 		return;
1060 	}
1061 
1062 	iparams.icid = cq->icid;
1063 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1064 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1065 
1066 	if (udata) {
1067 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1068 		ib_umem_release(cq->q.umem);
1069 
1070 		if (cq->q.db_rec_data) {
1071 			qedr_db_recovery_del(dev, cq->q.db_addr,
1072 					     &cq->q.db_rec_data->db_data);
1073 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1074 		}
1075 	} else {
1076 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1077 	}
1078 
1079 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1080 	 * wait until all CNQ interrupts, if any, are received. This will always
1081 	 * happen and will always happen very fast. If not, then a serious error
1082 	 * has occured. That is why we can use a long delay.
1083 	 * We spin for a short time so we don’t lose time on context switching
1084 	 * in case all the completions are handled in that span. Otherwise
1085 	 * we sleep for a while and check again. Since the CNQ may be
1086 	 * associated with (only) the current CPU we use msleep to allow the
1087 	 * current CPU to be freed.
1088 	 * The CNQ notification is increased in qedr_irq_handler().
1089 	 */
1090 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1091 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1092 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1093 		iter--;
1094 	}
1095 
1096 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1097 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1098 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1099 		iter--;
1100 	}
1101 
1102 	/* Note that we don't need to have explicit code to wait for the
1103 	 * completion of the event handler because it is invoked from the EQ.
1104 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1105 	 * be certain that there's no event handler in process.
1106 	 */
1107 }
1108 
1109 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1110 					  struct ib_qp_attr *attr,
1111 					  int attr_mask,
1112 					  struct qed_rdma_modify_qp_in_params
1113 					  *qp_params)
1114 {
1115 	const struct ib_gid_attr *gid_attr;
1116 	enum rdma_network_type nw_type;
1117 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1118 	u32 ipv4_addr;
1119 	int ret;
1120 	int i;
1121 
1122 	gid_attr = grh->sgid_attr;
1123 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1124 	if (ret)
1125 		return ret;
1126 
1127 	nw_type = rdma_gid_attr_network_type(gid_attr);
1128 	switch (nw_type) {
1129 	case RDMA_NETWORK_IPV6:
1130 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1131 		       sizeof(qp_params->sgid));
1132 		memcpy(&qp_params->dgid.bytes[0],
1133 		       &grh->dgid,
1134 		       sizeof(qp_params->dgid));
1135 		qp_params->roce_mode = ROCE_V2_IPV6;
1136 		SET_FIELD(qp_params->modify_flags,
1137 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1138 		break;
1139 	case RDMA_NETWORK_IB:
1140 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1141 		       sizeof(qp_params->sgid));
1142 		memcpy(&qp_params->dgid.bytes[0],
1143 		       &grh->dgid,
1144 		       sizeof(qp_params->dgid));
1145 		qp_params->roce_mode = ROCE_V1;
1146 		break;
1147 	case RDMA_NETWORK_IPV4:
1148 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1149 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1150 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1151 		qp_params->sgid.ipv4_addr = ipv4_addr;
1152 		ipv4_addr =
1153 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1154 		qp_params->dgid.ipv4_addr = ipv4_addr;
1155 		SET_FIELD(qp_params->modify_flags,
1156 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1157 		qp_params->roce_mode = ROCE_V2_IPV4;
1158 		break;
1159 	}
1160 
1161 	for (i = 0; i < 4; i++) {
1162 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1163 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1164 	}
1165 
1166 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1167 		qp_params->vlan_id = 0;
1168 
1169 	return 0;
1170 }
1171 
1172 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1173 			       struct ib_qp_init_attr *attrs,
1174 			       struct ib_udata *udata)
1175 {
1176 	struct qedr_device_attr *qattr = &dev->attr;
1177 
1178 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1179 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1180 		DP_DEBUG(dev, QEDR_MSG_QP,
1181 			 "create qp: unsupported qp type=0x%x requested\n",
1182 			 attrs->qp_type);
1183 		return -EINVAL;
1184 	}
1185 
1186 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1187 		DP_ERR(dev,
1188 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1189 		       attrs->cap.max_send_wr, qattr->max_sqe);
1190 		return -EINVAL;
1191 	}
1192 
1193 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1194 		DP_ERR(dev,
1195 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1196 		       attrs->cap.max_inline_data, qattr->max_inline);
1197 		return -EINVAL;
1198 	}
1199 
1200 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1201 		DP_ERR(dev,
1202 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1203 		       attrs->cap.max_send_sge, qattr->max_sge);
1204 		return -EINVAL;
1205 	}
1206 
1207 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1208 		DP_ERR(dev,
1209 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1210 		       attrs->cap.max_recv_sge, qattr->max_sge);
1211 		return -EINVAL;
1212 	}
1213 
1214 	/* Unprivileged user space cannot create special QP */
1215 	if (udata && attrs->qp_type == IB_QPT_GSI) {
1216 		DP_ERR(dev,
1217 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1218 		       attrs->qp_type);
1219 		return -EINVAL;
1220 	}
1221 
1222 	return 0;
1223 }
1224 
1225 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1226 			       struct qedr_srq *srq, struct ib_udata *udata)
1227 {
1228 	struct qedr_create_srq_uresp uresp = {};
1229 	int rc;
1230 
1231 	uresp.srq_id = srq->srq_id;
1232 
1233 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1234 	if (rc)
1235 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1236 
1237 	return rc;
1238 }
1239 
1240 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1241 			      struct qedr_create_qp_uresp *uresp,
1242 			      struct qedr_qp *qp)
1243 {
1244 	/* iWARP requires two doorbells per RQ. */
1245 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1246 		uresp->rq_db_offset =
1247 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1248 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1249 	} else {
1250 		uresp->rq_db_offset =
1251 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1252 	}
1253 
1254 	uresp->rq_icid = qp->icid;
1255 	uresp->rq_db_rec_addr = rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1256 }
1257 
1258 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1259 			       struct qedr_create_qp_uresp *uresp,
1260 			       struct qedr_qp *qp)
1261 {
1262 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1263 
1264 	/* iWARP uses the same cid for rq and sq */
1265 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1266 		uresp->sq_icid = qp->icid;
1267 	else
1268 		uresp->sq_icid = qp->icid + 1;
1269 
1270 	uresp->sq_db_rec_addr =
1271 		rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1272 }
1273 
1274 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1275 			      struct qedr_qp *qp, struct ib_udata *udata,
1276 			      struct qedr_create_qp_uresp *uresp)
1277 {
1278 	int rc;
1279 
1280 	memset(uresp, 0, sizeof(*uresp));
1281 	qedr_copy_sq_uresp(dev, uresp, qp);
1282 	qedr_copy_rq_uresp(dev, uresp, qp);
1283 
1284 	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1285 	uresp->qp_id = qp->qp_id;
1286 
1287 	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1288 	if (rc)
1289 		DP_ERR(dev,
1290 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1291 		       qp->icid);
1292 
1293 	return rc;
1294 }
1295 
1296 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1297 				      struct qedr_qp *qp,
1298 				      struct qedr_pd *pd,
1299 				      struct ib_qp_init_attr *attrs)
1300 {
1301 	spin_lock_init(&qp->q_lock);
1302 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1303 		kref_init(&qp->refcnt);
1304 		init_completion(&qp->iwarp_cm_comp);
1305 	}
1306 	qp->pd = pd;
1307 	qp->qp_type = attrs->qp_type;
1308 	qp->max_inline_data = attrs->cap.max_inline_data;
1309 	qp->sq.max_sges = attrs->cap.max_send_sge;
1310 	qp->state = QED_ROCE_QP_STATE_RESET;
1311 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1312 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1313 	qp->dev = dev;
1314 
1315 	if (attrs->srq) {
1316 		qp->srq = get_qedr_srq(attrs->srq);
1317 	} else {
1318 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1319 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1320 		DP_DEBUG(dev, QEDR_MSG_QP,
1321 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1322 			 qp->rq.max_sges, qp->rq_cq->icid);
1323 	}
1324 
1325 	DP_DEBUG(dev, QEDR_MSG_QP,
1326 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1327 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1328 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1329 	DP_DEBUG(dev, QEDR_MSG_QP,
1330 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1331 		 qp->sq.max_sges, qp->sq_cq->icid);
1332 }
1333 
1334 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1335 {
1336 	int rc;
1337 
1338 	qp->sq.db = dev->db_addr +
1339 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1340 	qp->sq.db_data.data.icid = qp->icid + 1;
1341 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1342 				  &qp->sq.db_data,
1343 				  DB_REC_WIDTH_32B,
1344 				  DB_REC_KERNEL);
1345 	if (rc)
1346 		return rc;
1347 
1348 	if (!qp->srq) {
1349 		qp->rq.db = dev->db_addr +
1350 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1351 		qp->rq.db_data.data.icid = qp->icid;
1352 
1353 		rc = qedr_db_recovery_add(dev, qp->rq.db,
1354 					  &qp->rq.db_data,
1355 					  DB_REC_WIDTH_32B,
1356 					  DB_REC_KERNEL);
1357 		if (rc)
1358 			qedr_db_recovery_del(dev, qp->sq.db,
1359 					     &qp->sq.db_data);
1360 	}
1361 
1362 	return rc;
1363 }
1364 
1365 static int qedr_check_srq_params(struct qedr_dev *dev,
1366 				 struct ib_srq_init_attr *attrs,
1367 				 struct ib_udata *udata)
1368 {
1369 	struct qedr_device_attr *qattr = &dev->attr;
1370 
1371 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1372 		DP_ERR(dev,
1373 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1374 		       attrs->attr.max_wr, qattr->max_srq_wr);
1375 		return -EINVAL;
1376 	}
1377 
1378 	if (attrs->attr.max_sge > qattr->max_sge) {
1379 		DP_ERR(dev,
1380 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1381 		       attrs->attr.max_sge, qattr->max_sge);
1382 		return -EINVAL;
1383 	}
1384 
1385 	return 0;
1386 }
1387 
1388 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1389 {
1390 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1391 	ib_umem_release(srq->usrq.umem);
1392 	ib_umem_release(srq->prod_umem);
1393 }
1394 
1395 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1396 {
1397 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1398 	struct qedr_dev *dev = srq->dev;
1399 
1400 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1401 
1402 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1403 			  hw_srq->virt_prod_pair_addr,
1404 			  hw_srq->phy_prod_pair_addr);
1405 }
1406 
1407 static int qedr_init_srq_user_params(struct ib_udata *udata,
1408 				     struct qedr_srq *srq,
1409 				     struct qedr_create_srq_ureq *ureq,
1410 				     int access, int dmasync)
1411 {
1412 	struct scatterlist *sg;
1413 	int rc;
1414 
1415 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1416 				  ureq->srq_len, false, access, dmasync, 1);
1417 	if (rc)
1418 		return rc;
1419 
1420 	srq->prod_umem =
1421 		ib_umem_get(udata, ureq->prod_pair_addr,
1422 			    sizeof(struct rdma_srq_producers), access, dmasync);
1423 	if (IS_ERR(srq->prod_umem)) {
1424 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1425 		ib_umem_release(srq->usrq.umem);
1426 		DP_ERR(srq->dev,
1427 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1428 		       PTR_ERR(srq->prod_umem));
1429 		return PTR_ERR(srq->prod_umem);
1430 	}
1431 
1432 	sg = srq->prod_umem->sg_head.sgl;
1433 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1434 
1435 	return 0;
1436 }
1437 
1438 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1439 					struct qedr_dev *dev,
1440 					struct ib_srq_init_attr *init_attr)
1441 {
1442 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1443 	dma_addr_t phy_prod_pair_addr;
1444 	u32 num_elems;
1445 	void *va;
1446 	int rc;
1447 
1448 	va = dma_alloc_coherent(&dev->pdev->dev,
1449 				sizeof(struct rdma_srq_producers),
1450 				&phy_prod_pair_addr, GFP_KERNEL);
1451 	if (!va) {
1452 		DP_ERR(dev,
1453 		       "create srq: failed to allocate dma memory for producer\n");
1454 		return -ENOMEM;
1455 	}
1456 
1457 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1458 	hw_srq->virt_prod_pair_addr = va;
1459 
1460 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1461 	rc = dev->ops->common->chain_alloc(dev->cdev,
1462 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1463 					   QED_CHAIN_MODE_PBL,
1464 					   QED_CHAIN_CNT_TYPE_U32,
1465 					   num_elems,
1466 					   QEDR_SRQ_WQE_ELEM_SIZE,
1467 					   &hw_srq->pbl, NULL);
1468 	if (rc)
1469 		goto err0;
1470 
1471 	hw_srq->num_elems = num_elems;
1472 
1473 	return 0;
1474 
1475 err0:
1476 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1477 			  va, phy_prod_pair_addr);
1478 	return rc;
1479 }
1480 
1481 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1482 		    struct ib_udata *udata)
1483 {
1484 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1485 	struct qed_rdma_create_srq_in_params in_params = {};
1486 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1487 	struct qed_rdma_create_srq_out_params out_params;
1488 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1489 	struct qedr_create_srq_ureq ureq = {};
1490 	u64 pbl_base_addr, phy_prod_pair_addr;
1491 	struct qedr_srq_hwq_info *hw_srq;
1492 	u32 page_cnt, page_size;
1493 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1494 	int rc = 0;
1495 
1496 	DP_DEBUG(dev, QEDR_MSG_QP,
1497 		 "create SRQ called from %s (pd %p)\n",
1498 		 (udata) ? "User lib" : "kernel", pd);
1499 
1500 	rc = qedr_check_srq_params(dev, init_attr, udata);
1501 	if (rc)
1502 		return -EINVAL;
1503 
1504 	srq->dev = dev;
1505 	hw_srq = &srq->hw_srq;
1506 	spin_lock_init(&srq->lock);
1507 
1508 	hw_srq->max_wr = init_attr->attr.max_wr;
1509 	hw_srq->max_sges = init_attr->attr.max_sge;
1510 
1511 	if (udata) {
1512 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1513 							 udata->inlen))) {
1514 			DP_ERR(dev,
1515 			       "create srq: problem copying data from user space\n");
1516 			goto err0;
1517 		}
1518 
1519 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0);
1520 		if (rc)
1521 			goto err0;
1522 
1523 		page_cnt = srq->usrq.pbl_info.num_pbes;
1524 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1525 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1526 		page_size = PAGE_SIZE;
1527 	} else {
1528 		struct qed_chain *pbl;
1529 
1530 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1531 		if (rc)
1532 			goto err0;
1533 
1534 		pbl = &hw_srq->pbl;
1535 		page_cnt = qed_chain_get_page_cnt(pbl);
1536 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1537 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1538 		page_size = QED_CHAIN_PAGE_SIZE;
1539 	}
1540 
1541 	in_params.pd_id = pd->pd_id;
1542 	in_params.pbl_base_addr = pbl_base_addr;
1543 	in_params.prod_pair_addr = phy_prod_pair_addr;
1544 	in_params.num_pages = page_cnt;
1545 	in_params.page_size = page_size;
1546 
1547 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1548 	if (rc)
1549 		goto err1;
1550 
1551 	srq->srq_id = out_params.srq_id;
1552 
1553 	if (udata) {
1554 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1555 		if (rc)
1556 			goto err2;
1557 	}
1558 
1559 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1560 	if (rc)
1561 		goto err2;
1562 
1563 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1564 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1565 	return 0;
1566 
1567 err2:
1568 	destroy_in_params.srq_id = srq->srq_id;
1569 
1570 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1571 err1:
1572 	if (udata)
1573 		qedr_free_srq_user_params(srq);
1574 	else
1575 		qedr_free_srq_kernel_params(srq);
1576 err0:
1577 	return -EFAULT;
1578 }
1579 
1580 void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1581 {
1582 	struct qed_rdma_destroy_srq_in_params in_params = {};
1583 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1584 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1585 
1586 	xa_erase_irq(&dev->srqs, srq->srq_id);
1587 	in_params.srq_id = srq->srq_id;
1588 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1589 
1590 	if (ibsrq->uobject)
1591 		qedr_free_srq_user_params(srq);
1592 	else
1593 		qedr_free_srq_kernel_params(srq);
1594 
1595 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1596 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1597 		 srq->srq_id);
1598 }
1599 
1600 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1601 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1602 {
1603 	struct qed_rdma_modify_srq_in_params in_params = {};
1604 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1605 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1606 	int rc;
1607 
1608 	if (attr_mask & IB_SRQ_MAX_WR) {
1609 		DP_ERR(dev,
1610 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1611 		       attr_mask, srq);
1612 		return -EINVAL;
1613 	}
1614 
1615 	if (attr_mask & IB_SRQ_LIMIT) {
1616 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1617 			DP_ERR(dev,
1618 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1619 			       attr->srq_limit, srq->hw_srq.max_wr);
1620 			return -EINVAL;
1621 		}
1622 
1623 		in_params.srq_id = srq->srq_id;
1624 		in_params.wqe_limit = attr->srq_limit;
1625 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1626 		if (rc)
1627 			return rc;
1628 	}
1629 
1630 	srq->srq_limit = attr->srq_limit;
1631 
1632 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1633 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1634 
1635 	return 0;
1636 }
1637 
1638 static inline void
1639 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1640 			      struct qedr_pd *pd,
1641 			      struct qedr_qp *qp,
1642 			      struct ib_qp_init_attr *attrs,
1643 			      bool fmr_and_reserved_lkey,
1644 			      struct qed_rdma_create_qp_in_params *params)
1645 {
1646 	/* QP handle to be written in an async event */
1647 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1648 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1649 
1650 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1651 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1652 	params->pd = pd->pd_id;
1653 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1654 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1655 	params->stats_queue = 0;
1656 	params->srq_id = 0;
1657 	params->use_srq = false;
1658 
1659 	if (!qp->srq) {
1660 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1661 
1662 	} else {
1663 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1664 		params->srq_id = qp->srq->srq_id;
1665 		params->use_srq = true;
1666 	}
1667 }
1668 
1669 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1670 {
1671 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1672 		 "qp=%p. "
1673 		 "sq_addr=0x%llx, "
1674 		 "sq_len=%zd, "
1675 		 "rq_addr=0x%llx, "
1676 		 "rq_len=%zd"
1677 		 "\n",
1678 		 qp,
1679 		 qp->usq.buf_addr,
1680 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1681 }
1682 
1683 static inline void
1684 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1685 			    struct qedr_qp *qp,
1686 			    struct qed_rdma_create_qp_out_params *out_params)
1687 {
1688 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1689 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1690 
1691 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1692 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1693 	if (!qp->srq) {
1694 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1695 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1696 	}
1697 
1698 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1699 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1700 }
1701 
1702 static void qedr_cleanup_user(struct qedr_dev *dev,
1703 			      struct qedr_ucontext *ctx,
1704 			      struct qedr_qp *qp)
1705 {
1706 	ib_umem_release(qp->usq.umem);
1707 	qp->usq.umem = NULL;
1708 
1709 	ib_umem_release(qp->urq.umem);
1710 	qp->urq.umem = NULL;
1711 
1712 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1713 		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1714 		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1715 	} else {
1716 		kfree(qp->usq.pbl_tbl);
1717 		kfree(qp->urq.pbl_tbl);
1718 	}
1719 
1720 	if (qp->usq.db_rec_data) {
1721 		qedr_db_recovery_del(dev, qp->usq.db_addr,
1722 				     &qp->usq.db_rec_data->db_data);
1723 		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1724 	}
1725 
1726 	if (qp->urq.db_rec_data) {
1727 		qedr_db_recovery_del(dev, qp->urq.db_addr,
1728 				     &qp->urq.db_rec_data->db_data);
1729 		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1730 	}
1731 }
1732 
1733 static int qedr_create_user_qp(struct qedr_dev *dev,
1734 			       struct qedr_qp *qp,
1735 			       struct ib_pd *ibpd,
1736 			       struct ib_udata *udata,
1737 			       struct ib_qp_init_attr *attrs)
1738 {
1739 	struct qed_rdma_create_qp_in_params in_params;
1740 	struct qed_rdma_create_qp_out_params out_params;
1741 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1742 	struct qedr_create_qp_uresp uresp;
1743 	struct qedr_ucontext *ctx = NULL;
1744 	struct qedr_create_qp_ureq ureq;
1745 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1746 	int rc = -EINVAL;
1747 
1748 	qp->create_type = QEDR_QP_CREATE_USER;
1749 	memset(&ureq, 0, sizeof(ureq));
1750 	rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
1751 	if (rc) {
1752 		DP_ERR(dev, "Problem copying data from user space\n");
1753 		return rc;
1754 	}
1755 
1756 	/* SQ - read access only (0), dma sync not required (0) */
1757 	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1758 				  ureq.sq_len, true, 0, 0,
1759 				  alloc_and_init);
1760 	if (rc)
1761 		return rc;
1762 
1763 	if (!qp->srq) {
1764 		/* RQ - read access only (0), dma sync not required (0) */
1765 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1766 					  ureq.rq_len, true,
1767 					  0, 0, alloc_and_init);
1768 		if (rc)
1769 			return rc;
1770 	}
1771 
1772 	memset(&in_params, 0, sizeof(in_params));
1773 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1774 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1775 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1776 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1777 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1778 	if (!qp->srq) {
1779 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1780 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1781 	}
1782 
1783 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1784 					      &in_params, &out_params);
1785 
1786 	if (!qp->qed_qp) {
1787 		rc = -ENOMEM;
1788 		goto err1;
1789 	}
1790 
1791 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1792 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1793 
1794 	qp->qp_id = out_params.qp_id;
1795 	qp->icid = out_params.icid;
1796 
1797 	rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1798 	if (rc)
1799 		goto err;
1800 
1801 	/* db offset was calculated in copy_qp_uresp, now set in the user q */
1802 	ctx = pd->uctx;
1803 	qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1804 	qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1805 
1806 	rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1807 				  &qp->usq.db_rec_data->db_data,
1808 				  DB_REC_WIDTH_32B,
1809 				  DB_REC_USER);
1810 	if (rc)
1811 		goto err;
1812 
1813 	rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1814 				  &qp->urq.db_rec_data->db_data,
1815 				  DB_REC_WIDTH_32B,
1816 				  DB_REC_USER);
1817 	if (rc)
1818 		goto err;
1819 
1820 	qedr_qp_user_print(dev, qp);
1821 
1822 	return rc;
1823 err:
1824 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1825 	if (rc)
1826 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1827 
1828 err1:
1829 	qedr_cleanup_user(dev, ctx, qp);
1830 	return rc;
1831 }
1832 
1833 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1834 {
1835 	int rc;
1836 
1837 	qp->sq.db = dev->db_addr +
1838 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1839 	qp->sq.db_data.data.icid = qp->icid;
1840 
1841 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1842 				  &qp->sq.db_data,
1843 				  DB_REC_WIDTH_32B,
1844 				  DB_REC_KERNEL);
1845 	if (rc)
1846 		return rc;
1847 
1848 	qp->rq.db = dev->db_addr +
1849 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1850 	qp->rq.db_data.data.icid = qp->icid;
1851 	qp->rq.iwarp_db2 = dev->db_addr +
1852 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1853 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1854 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1855 
1856 	rc = qedr_db_recovery_add(dev, qp->rq.db,
1857 				  &qp->rq.db_data,
1858 				  DB_REC_WIDTH_32B,
1859 				  DB_REC_KERNEL);
1860 	return rc;
1861 }
1862 
1863 static int
1864 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1865 			   struct qedr_qp *qp,
1866 			   struct qed_rdma_create_qp_in_params *in_params,
1867 			   u32 n_sq_elems, u32 n_rq_elems)
1868 {
1869 	struct qed_rdma_create_qp_out_params out_params;
1870 	int rc;
1871 
1872 	rc = dev->ops->common->chain_alloc(dev->cdev,
1873 					   QED_CHAIN_USE_TO_PRODUCE,
1874 					   QED_CHAIN_MODE_PBL,
1875 					   QED_CHAIN_CNT_TYPE_U32,
1876 					   n_sq_elems,
1877 					   QEDR_SQE_ELEMENT_SIZE,
1878 					   &qp->sq.pbl, NULL);
1879 
1880 	if (rc)
1881 		return rc;
1882 
1883 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1884 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1885 
1886 	rc = dev->ops->common->chain_alloc(dev->cdev,
1887 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1888 					   QED_CHAIN_MODE_PBL,
1889 					   QED_CHAIN_CNT_TYPE_U32,
1890 					   n_rq_elems,
1891 					   QEDR_RQE_ELEMENT_SIZE,
1892 					   &qp->rq.pbl, NULL);
1893 	if (rc)
1894 		return rc;
1895 
1896 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1897 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1898 
1899 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1900 					      in_params, &out_params);
1901 
1902 	if (!qp->qed_qp)
1903 		return -EINVAL;
1904 
1905 	qp->qp_id = out_params.qp_id;
1906 	qp->icid = out_params.icid;
1907 
1908 	return qedr_set_roce_db_info(dev, qp);
1909 }
1910 
1911 static int
1912 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1913 			    struct qedr_qp *qp,
1914 			    struct qed_rdma_create_qp_in_params *in_params,
1915 			    u32 n_sq_elems, u32 n_rq_elems)
1916 {
1917 	struct qed_rdma_create_qp_out_params out_params;
1918 	struct qed_chain_ext_pbl ext_pbl;
1919 	int rc;
1920 
1921 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1922 						     QEDR_SQE_ELEMENT_SIZE,
1923 						     QED_CHAIN_MODE_PBL);
1924 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1925 						     QEDR_RQE_ELEMENT_SIZE,
1926 						     QED_CHAIN_MODE_PBL);
1927 
1928 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1929 					      in_params, &out_params);
1930 
1931 	if (!qp->qed_qp)
1932 		return -EINVAL;
1933 
1934 	/* Now we allocate the chain */
1935 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1936 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1937 
1938 	rc = dev->ops->common->chain_alloc(dev->cdev,
1939 					   QED_CHAIN_USE_TO_PRODUCE,
1940 					   QED_CHAIN_MODE_PBL,
1941 					   QED_CHAIN_CNT_TYPE_U32,
1942 					   n_sq_elems,
1943 					   QEDR_SQE_ELEMENT_SIZE,
1944 					   &qp->sq.pbl, &ext_pbl);
1945 
1946 	if (rc)
1947 		goto err;
1948 
1949 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1950 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1951 
1952 	rc = dev->ops->common->chain_alloc(dev->cdev,
1953 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1954 					   QED_CHAIN_MODE_PBL,
1955 					   QED_CHAIN_CNT_TYPE_U32,
1956 					   n_rq_elems,
1957 					   QEDR_RQE_ELEMENT_SIZE,
1958 					   &qp->rq.pbl, &ext_pbl);
1959 
1960 	if (rc)
1961 		goto err;
1962 
1963 	qp->qp_id = out_params.qp_id;
1964 	qp->icid = out_params.icid;
1965 
1966 	return qedr_set_iwarp_db_info(dev, qp);
1967 
1968 err:
1969 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1970 
1971 	return rc;
1972 }
1973 
1974 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1975 {
1976 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1977 	kfree(qp->wqe_wr_id);
1978 
1979 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1980 	kfree(qp->rqe_wr_id);
1981 
1982 	/* GSI qp is not registered to db mechanism so no need to delete */
1983 	if (qp->qp_type == IB_QPT_GSI)
1984 		return;
1985 
1986 	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
1987 
1988 	if (!qp->srq)
1989 		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
1990 }
1991 
1992 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1993 				 struct qedr_qp *qp,
1994 				 struct ib_pd *ibpd,
1995 				 struct ib_qp_init_attr *attrs)
1996 {
1997 	struct qed_rdma_create_qp_in_params in_params;
1998 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1999 	int rc = -EINVAL;
2000 	u32 n_rq_elems;
2001 	u32 n_sq_elems;
2002 	u32 n_sq_entries;
2003 
2004 	memset(&in_params, 0, sizeof(in_params));
2005 	qp->create_type = QEDR_QP_CREATE_KERNEL;
2006 
2007 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2008 	 * the ring. The ring should allow at least a single WR, even if the
2009 	 * user requested none, due to allocation issues.
2010 	 * We should add an extra WR since the prod and cons indices of
2011 	 * wqe_wr_id are managed in such a way that the WQ is considered full
2012 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
2013 	 * double the number of entries due an iSER issue that pushes far more
2014 	 * WRs than indicated. If we decline its ib_post_send() then we get
2015 	 * error prints in the dmesg we'd like to avoid.
2016 	 */
2017 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2018 			      dev->attr.max_sqe);
2019 
2020 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2021 				GFP_KERNEL);
2022 	if (!qp->wqe_wr_id) {
2023 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2024 		return -ENOMEM;
2025 	}
2026 
2027 	/* QP handle to be written in CQE */
2028 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2029 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2030 
2031 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2032 	 * the ring. There ring should allow at least a single WR, even if the
2033 	 * user requested none, due to allocation issues.
2034 	 */
2035 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2036 
2037 	/* Allocate driver internal RQ array */
2038 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2039 				GFP_KERNEL);
2040 	if (!qp->rqe_wr_id) {
2041 		DP_ERR(dev,
2042 		       "create qp: failed RQ shadow memory allocation\n");
2043 		kfree(qp->wqe_wr_id);
2044 		return -ENOMEM;
2045 	}
2046 
2047 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2048 
2049 	n_sq_entries = attrs->cap.max_send_wr;
2050 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2051 	n_sq_entries = max_t(u32, n_sq_entries, 1);
2052 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2053 
2054 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2055 
2056 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2057 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2058 						 n_sq_elems, n_rq_elems);
2059 	else
2060 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2061 						n_sq_elems, n_rq_elems);
2062 	if (rc)
2063 		qedr_cleanup_kernel(dev, qp);
2064 
2065 	return rc;
2066 }
2067 
2068 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
2069 			     struct ib_qp_init_attr *attrs,
2070 			     struct ib_udata *udata)
2071 {
2072 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2073 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2074 	struct qedr_qp *qp;
2075 	struct ib_qp *ibqp;
2076 	int rc = 0;
2077 
2078 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2079 		 udata ? "user library" : "kernel", pd);
2080 
2081 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2082 	if (rc)
2083 		return ERR_PTR(rc);
2084 
2085 	DP_DEBUG(dev, QEDR_MSG_QP,
2086 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2087 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2088 		 get_qedr_cq(attrs->send_cq),
2089 		 get_qedr_cq(attrs->send_cq)->icid,
2090 		 get_qedr_cq(attrs->recv_cq),
2091 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2092 
2093 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2094 	if (!qp) {
2095 		DP_ERR(dev, "create qp: failed allocating memory\n");
2096 		return ERR_PTR(-ENOMEM);
2097 	}
2098 
2099 	qedr_set_common_qp_params(dev, qp, pd, attrs);
2100 
2101 	if (attrs->qp_type == IB_QPT_GSI) {
2102 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
2103 		if (IS_ERR(ibqp))
2104 			kfree(qp);
2105 		return ibqp;
2106 	}
2107 
2108 	if (udata)
2109 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2110 	else
2111 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2112 
2113 	if (rc)
2114 		goto err;
2115 
2116 	qp->ibqp.qp_num = qp->qp_id;
2117 
2118 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2119 		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2120 		if (rc)
2121 			goto err;
2122 	}
2123 
2124 	return &qp->ibqp;
2125 
2126 err:
2127 	kfree(qp);
2128 
2129 	return ERR_PTR(-EFAULT);
2130 }
2131 
2132 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2133 {
2134 	switch (qp_state) {
2135 	case QED_ROCE_QP_STATE_RESET:
2136 		return IB_QPS_RESET;
2137 	case QED_ROCE_QP_STATE_INIT:
2138 		return IB_QPS_INIT;
2139 	case QED_ROCE_QP_STATE_RTR:
2140 		return IB_QPS_RTR;
2141 	case QED_ROCE_QP_STATE_RTS:
2142 		return IB_QPS_RTS;
2143 	case QED_ROCE_QP_STATE_SQD:
2144 		return IB_QPS_SQD;
2145 	case QED_ROCE_QP_STATE_ERR:
2146 		return IB_QPS_ERR;
2147 	case QED_ROCE_QP_STATE_SQE:
2148 		return IB_QPS_SQE;
2149 	}
2150 	return IB_QPS_ERR;
2151 }
2152 
2153 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2154 					enum ib_qp_state qp_state)
2155 {
2156 	switch (qp_state) {
2157 	case IB_QPS_RESET:
2158 		return QED_ROCE_QP_STATE_RESET;
2159 	case IB_QPS_INIT:
2160 		return QED_ROCE_QP_STATE_INIT;
2161 	case IB_QPS_RTR:
2162 		return QED_ROCE_QP_STATE_RTR;
2163 	case IB_QPS_RTS:
2164 		return QED_ROCE_QP_STATE_RTS;
2165 	case IB_QPS_SQD:
2166 		return QED_ROCE_QP_STATE_SQD;
2167 	case IB_QPS_ERR:
2168 		return QED_ROCE_QP_STATE_ERR;
2169 	default:
2170 		return QED_ROCE_QP_STATE_ERR;
2171 	}
2172 }
2173 
2174 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2175 {
2176 	qed_chain_reset(&qph->pbl);
2177 	qph->prod = 0;
2178 	qph->cons = 0;
2179 	qph->wqe_cons = 0;
2180 	qph->db_data.data.value = cpu_to_le16(0);
2181 }
2182 
2183 static int qedr_update_qp_state(struct qedr_dev *dev,
2184 				struct qedr_qp *qp,
2185 				enum qed_roce_qp_state cur_state,
2186 				enum qed_roce_qp_state new_state)
2187 {
2188 	int status = 0;
2189 
2190 	if (new_state == cur_state)
2191 		return 0;
2192 
2193 	switch (cur_state) {
2194 	case QED_ROCE_QP_STATE_RESET:
2195 		switch (new_state) {
2196 		case QED_ROCE_QP_STATE_INIT:
2197 			qp->prev_wqe_size = 0;
2198 			qedr_reset_qp_hwq_info(&qp->sq);
2199 			qedr_reset_qp_hwq_info(&qp->rq);
2200 			break;
2201 		default:
2202 			status = -EINVAL;
2203 			break;
2204 		}
2205 		break;
2206 	case QED_ROCE_QP_STATE_INIT:
2207 		switch (new_state) {
2208 		case QED_ROCE_QP_STATE_RTR:
2209 			/* Update doorbell (in case post_recv was
2210 			 * done before move to RTR)
2211 			 */
2212 
2213 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2214 				writel(qp->rq.db_data.raw, qp->rq.db);
2215 			}
2216 			break;
2217 		case QED_ROCE_QP_STATE_ERR:
2218 			break;
2219 		default:
2220 			/* Invalid state change. */
2221 			status = -EINVAL;
2222 			break;
2223 		}
2224 		break;
2225 	case QED_ROCE_QP_STATE_RTR:
2226 		/* RTR->XXX */
2227 		switch (new_state) {
2228 		case QED_ROCE_QP_STATE_RTS:
2229 			break;
2230 		case QED_ROCE_QP_STATE_ERR:
2231 			break;
2232 		default:
2233 			/* Invalid state change. */
2234 			status = -EINVAL;
2235 			break;
2236 		}
2237 		break;
2238 	case QED_ROCE_QP_STATE_RTS:
2239 		/* RTS->XXX */
2240 		switch (new_state) {
2241 		case QED_ROCE_QP_STATE_SQD:
2242 			break;
2243 		case QED_ROCE_QP_STATE_ERR:
2244 			break;
2245 		default:
2246 			/* Invalid state change. */
2247 			status = -EINVAL;
2248 			break;
2249 		}
2250 		break;
2251 	case QED_ROCE_QP_STATE_SQD:
2252 		/* SQD->XXX */
2253 		switch (new_state) {
2254 		case QED_ROCE_QP_STATE_RTS:
2255 		case QED_ROCE_QP_STATE_ERR:
2256 			break;
2257 		default:
2258 			/* Invalid state change. */
2259 			status = -EINVAL;
2260 			break;
2261 		}
2262 		break;
2263 	case QED_ROCE_QP_STATE_ERR:
2264 		/* ERR->XXX */
2265 		switch (new_state) {
2266 		case QED_ROCE_QP_STATE_RESET:
2267 			if ((qp->rq.prod != qp->rq.cons) ||
2268 			    (qp->sq.prod != qp->sq.cons)) {
2269 				DP_NOTICE(dev,
2270 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2271 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2272 					  qp->sq.cons);
2273 				status = -EINVAL;
2274 			}
2275 			break;
2276 		default:
2277 			status = -EINVAL;
2278 			break;
2279 		}
2280 		break;
2281 	default:
2282 		status = -EINVAL;
2283 		break;
2284 	}
2285 
2286 	return status;
2287 }
2288 
2289 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2290 		   int attr_mask, struct ib_udata *udata)
2291 {
2292 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2293 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2294 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2295 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2296 	enum ib_qp_state old_qp_state, new_qp_state;
2297 	enum qed_roce_qp_state cur_state;
2298 	int rc = 0;
2299 
2300 	DP_DEBUG(dev, QEDR_MSG_QP,
2301 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2302 		 attr->qp_state);
2303 
2304 	old_qp_state = qedr_get_ibqp_state(qp->state);
2305 	if (attr_mask & IB_QP_STATE)
2306 		new_qp_state = attr->qp_state;
2307 	else
2308 		new_qp_state = old_qp_state;
2309 
2310 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2311 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2312 					ibqp->qp_type, attr_mask)) {
2313 			DP_ERR(dev,
2314 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2315 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2316 			       attr_mask, qp->qp_id, ibqp->qp_type,
2317 			       old_qp_state, new_qp_state);
2318 			rc = -EINVAL;
2319 			goto err;
2320 		}
2321 	}
2322 
2323 	/* Translate the masks... */
2324 	if (attr_mask & IB_QP_STATE) {
2325 		SET_FIELD(qp_params.modify_flags,
2326 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2327 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2328 	}
2329 
2330 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2331 		qp_params.sqd_async = true;
2332 
2333 	if (attr_mask & IB_QP_PKEY_INDEX) {
2334 		SET_FIELD(qp_params.modify_flags,
2335 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2336 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2337 			rc = -EINVAL;
2338 			goto err;
2339 		}
2340 
2341 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2342 	}
2343 
2344 	if (attr_mask & IB_QP_QKEY)
2345 		qp->qkey = attr->qkey;
2346 
2347 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2348 		SET_FIELD(qp_params.modify_flags,
2349 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2350 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2351 						  IB_ACCESS_REMOTE_READ;
2352 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2353 						   IB_ACCESS_REMOTE_WRITE;
2354 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2355 					       IB_ACCESS_REMOTE_ATOMIC;
2356 	}
2357 
2358 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2359 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2360 			return -EINVAL;
2361 
2362 		if (attr_mask & IB_QP_PATH_MTU) {
2363 			if (attr->path_mtu < IB_MTU_256 ||
2364 			    attr->path_mtu > IB_MTU_4096) {
2365 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2366 				rc = -EINVAL;
2367 				goto err;
2368 			}
2369 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2370 				      ib_mtu_enum_to_int(iboe_get_mtu
2371 							 (dev->ndev->mtu)));
2372 		}
2373 
2374 		if (!qp->mtu) {
2375 			qp->mtu =
2376 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2377 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2378 		}
2379 
2380 		SET_FIELD(qp_params.modify_flags,
2381 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2382 
2383 		qp_params.traffic_class_tos = grh->traffic_class;
2384 		qp_params.flow_label = grh->flow_label;
2385 		qp_params.hop_limit_ttl = grh->hop_limit;
2386 
2387 		qp->sgid_idx = grh->sgid_index;
2388 
2389 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2390 		if (rc) {
2391 			DP_ERR(dev,
2392 			       "modify qp: problems with GID index %d (rc=%d)\n",
2393 			       grh->sgid_index, rc);
2394 			return rc;
2395 		}
2396 
2397 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2398 				   qp_params.remote_mac_addr);
2399 		if (rc)
2400 			return rc;
2401 
2402 		qp_params.use_local_mac = true;
2403 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2404 
2405 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2406 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2407 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2408 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2409 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2410 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2411 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2412 			 qp_params.remote_mac_addr);
2413 
2414 		qp_params.mtu = qp->mtu;
2415 		qp_params.lb_indication = false;
2416 	}
2417 
2418 	if (!qp_params.mtu) {
2419 		/* Stay with current MTU */
2420 		if (qp->mtu)
2421 			qp_params.mtu = qp->mtu;
2422 		else
2423 			qp_params.mtu =
2424 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2425 	}
2426 
2427 	if (attr_mask & IB_QP_TIMEOUT) {
2428 		SET_FIELD(qp_params.modify_flags,
2429 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2430 
2431 		/* The received timeout value is an exponent used like this:
2432 		 *    "12.7.34 LOCAL ACK TIMEOUT
2433 		 *    Value representing the transport (ACK) timeout for use by
2434 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2435 		 * The FW expects timeout in msec so we need to divide the usec
2436 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2437 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2438 		 * The value of zero means infinite so we use a 'max_t' to make
2439 		 * sure that sub 1 msec values will be configured as 1 msec.
2440 		 */
2441 		if (attr->timeout)
2442 			qp_params.ack_timeout =
2443 					1 << max_t(int, attr->timeout - 8, 0);
2444 		else
2445 			qp_params.ack_timeout = 0;
2446 	}
2447 
2448 	if (attr_mask & IB_QP_RETRY_CNT) {
2449 		SET_FIELD(qp_params.modify_flags,
2450 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2451 		qp_params.retry_cnt = attr->retry_cnt;
2452 	}
2453 
2454 	if (attr_mask & IB_QP_RNR_RETRY) {
2455 		SET_FIELD(qp_params.modify_flags,
2456 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2457 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2458 	}
2459 
2460 	if (attr_mask & IB_QP_RQ_PSN) {
2461 		SET_FIELD(qp_params.modify_flags,
2462 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2463 		qp_params.rq_psn = attr->rq_psn;
2464 		qp->rq_psn = attr->rq_psn;
2465 	}
2466 
2467 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2468 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2469 			rc = -EINVAL;
2470 			DP_ERR(dev,
2471 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2472 			       attr->max_rd_atomic,
2473 			       dev->attr.max_qp_req_rd_atomic_resc);
2474 			goto err;
2475 		}
2476 
2477 		SET_FIELD(qp_params.modify_flags,
2478 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2479 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2480 	}
2481 
2482 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2483 		SET_FIELD(qp_params.modify_flags,
2484 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2485 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2486 	}
2487 
2488 	if (attr_mask & IB_QP_SQ_PSN) {
2489 		SET_FIELD(qp_params.modify_flags,
2490 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2491 		qp_params.sq_psn = attr->sq_psn;
2492 		qp->sq_psn = attr->sq_psn;
2493 	}
2494 
2495 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2496 		if (attr->max_dest_rd_atomic >
2497 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2498 			DP_ERR(dev,
2499 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2500 			       attr->max_dest_rd_atomic,
2501 			       dev->attr.max_qp_resp_rd_atomic_resc);
2502 
2503 			rc = -EINVAL;
2504 			goto err;
2505 		}
2506 
2507 		SET_FIELD(qp_params.modify_flags,
2508 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2509 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2510 	}
2511 
2512 	if (attr_mask & IB_QP_DEST_QPN) {
2513 		SET_FIELD(qp_params.modify_flags,
2514 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2515 
2516 		qp_params.dest_qp = attr->dest_qp_num;
2517 		qp->dest_qp_num = attr->dest_qp_num;
2518 	}
2519 
2520 	cur_state = qp->state;
2521 
2522 	/* Update the QP state before the actual ramrod to prevent a race with
2523 	 * fast path. Modifying the QP state to error will cause the device to
2524 	 * flush the CQEs and while polling the flushed CQEs will considered as
2525 	 * a potential issue if the QP isn't in error state.
2526 	 */
2527 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2528 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2529 		qp->state = QED_ROCE_QP_STATE_ERR;
2530 
2531 	if (qp->qp_type != IB_QPT_GSI)
2532 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2533 					      qp->qed_qp, &qp_params);
2534 
2535 	if (attr_mask & IB_QP_STATE) {
2536 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2537 			rc = qedr_update_qp_state(dev, qp, cur_state,
2538 						  qp_params.new_state);
2539 		qp->state = qp_params.new_state;
2540 	}
2541 
2542 err:
2543 	return rc;
2544 }
2545 
2546 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2547 {
2548 	int ib_qp_acc_flags = 0;
2549 
2550 	if (params->incoming_rdma_write_en)
2551 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2552 	if (params->incoming_rdma_read_en)
2553 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2554 	if (params->incoming_atomic_en)
2555 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2556 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2557 	return ib_qp_acc_flags;
2558 }
2559 
2560 int qedr_query_qp(struct ib_qp *ibqp,
2561 		  struct ib_qp_attr *qp_attr,
2562 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2563 {
2564 	struct qed_rdma_query_qp_out_params params;
2565 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2566 	struct qedr_dev *dev = qp->dev;
2567 	int rc = 0;
2568 
2569 	memset(&params, 0, sizeof(params));
2570 
2571 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2572 	if (rc)
2573 		goto err;
2574 
2575 	memset(qp_attr, 0, sizeof(*qp_attr));
2576 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2577 
2578 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2579 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2580 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2581 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2582 	qp_attr->rq_psn = params.rq_psn;
2583 	qp_attr->sq_psn = params.sq_psn;
2584 	qp_attr->dest_qp_num = params.dest_qp;
2585 
2586 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2587 
2588 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2589 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2590 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2591 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2592 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2593 	qp_init_attr->cap = qp_attr->cap;
2594 
2595 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2596 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2597 			params.flow_label, qp->sgid_idx,
2598 			params.hop_limit_ttl, params.traffic_class_tos);
2599 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2600 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2601 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2602 	qp_attr->timeout = params.timeout;
2603 	qp_attr->rnr_retry = params.rnr_retry;
2604 	qp_attr->retry_cnt = params.retry_cnt;
2605 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2606 	qp_attr->pkey_index = params.pkey_index;
2607 	qp_attr->port_num = 1;
2608 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2609 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2610 	qp_attr->alt_pkey_index = 0;
2611 	qp_attr->alt_port_num = 0;
2612 	qp_attr->alt_timeout = 0;
2613 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2614 
2615 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2616 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2617 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2618 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2619 
2620 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2621 		 qp_attr->cap.max_inline_data);
2622 
2623 err:
2624 	return rc;
2625 }
2626 
2627 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2628 				  struct ib_udata *udata)
2629 {
2630 	struct qedr_ucontext *ctx =
2631 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2632 					  ibucontext);
2633 	int rc;
2634 
2635 	if (qp->qp_type != IB_QPT_GSI) {
2636 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2637 		if (rc)
2638 			return rc;
2639 	}
2640 
2641 	if (qp->create_type == QEDR_QP_CREATE_USER)
2642 		qedr_cleanup_user(dev, ctx, qp);
2643 	else
2644 		qedr_cleanup_kernel(dev, qp);
2645 
2646 	return 0;
2647 }
2648 
2649 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2650 {
2651 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2652 	struct qedr_dev *dev = qp->dev;
2653 	struct ib_qp_attr attr;
2654 	int attr_mask = 0;
2655 
2656 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2657 		 qp, qp->qp_type);
2658 
2659 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2660 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2661 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2662 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2663 
2664 			attr.qp_state = IB_QPS_ERR;
2665 			attr_mask |= IB_QP_STATE;
2666 
2667 			/* Change the QP state to ERROR */
2668 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2669 		}
2670 	} else {
2671 		/* If connection establishment started the WAIT_FOR_CONNECT
2672 		 * bit will be on and we need to Wait for the establishment
2673 		 * to complete before destroying the qp.
2674 		 */
2675 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2676 				     &qp->iwarp_cm_flags))
2677 			wait_for_completion(&qp->iwarp_cm_comp);
2678 
2679 		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2680 		 * bit will be on, and we need to wait for the disconnect to
2681 		 * complete before continuing. We can use the same completion,
2682 		 * iwarp_cm_comp, since this is the only place that waits for
2683 		 * this completion and it is sequential. In addition,
2684 		 * disconnect can't occur before the connection is fully
2685 		 * established, therefore if WAIT_FOR_DISCONNECT is on it
2686 		 * means WAIT_FOR_CONNECT is also on and the completion for
2687 		 * CONNECT already occurred.
2688 		 */
2689 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2690 				     &qp->iwarp_cm_flags))
2691 			wait_for_completion(&qp->iwarp_cm_comp);
2692 	}
2693 
2694 	if (qp->qp_type == IB_QPT_GSI)
2695 		qedr_destroy_gsi_qp(dev);
2696 
2697 	/* We need to remove the entry from the xarray before we release the
2698 	 * qp_id to avoid a race of the qp_id being reallocated and failing
2699 	 * on xa_insert
2700 	 */
2701 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2702 		xa_erase(&dev->qps, qp->qp_id);
2703 
2704 	qedr_free_qp_resources(dev, qp, udata);
2705 
2706 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2707 		qedr_iw_qp_rem_ref(&qp->ibqp);
2708 
2709 	return 0;
2710 }
2711 
2712 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
2713 		   struct ib_udata *udata)
2714 {
2715 	struct qedr_ah *ah = get_qedr_ah(ibah);
2716 
2717 	rdma_copy_ah_attr(&ah->attr, attr);
2718 
2719 	return 0;
2720 }
2721 
2722 void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2723 {
2724 	struct qedr_ah *ah = get_qedr_ah(ibah);
2725 
2726 	rdma_destroy_ah_attr(&ah->attr);
2727 }
2728 
2729 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2730 {
2731 	struct qedr_pbl *pbl, *tmp;
2732 
2733 	if (info->pbl_table)
2734 		list_add_tail(&info->pbl_table->list_entry,
2735 			      &info->free_pbl_list);
2736 
2737 	if (!list_empty(&info->inuse_pbl_list))
2738 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2739 
2740 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2741 		list_del(&pbl->list_entry);
2742 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2743 	}
2744 }
2745 
2746 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2747 			size_t page_list_len, bool two_layered)
2748 {
2749 	struct qedr_pbl *tmp;
2750 	int rc;
2751 
2752 	INIT_LIST_HEAD(&info->free_pbl_list);
2753 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2754 
2755 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2756 				  page_list_len, two_layered);
2757 	if (rc)
2758 		goto done;
2759 
2760 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2761 	if (IS_ERR(info->pbl_table)) {
2762 		rc = PTR_ERR(info->pbl_table);
2763 		goto done;
2764 	}
2765 
2766 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2767 		 &info->pbl_table->pa);
2768 
2769 	/* in usual case we use 2 PBLs, so we add one to free
2770 	 * list and allocating another one
2771 	 */
2772 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2773 	if (IS_ERR(tmp)) {
2774 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2775 		goto done;
2776 	}
2777 
2778 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2779 
2780 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2781 
2782 done:
2783 	if (rc)
2784 		free_mr_info(dev, info);
2785 
2786 	return rc;
2787 }
2788 
2789 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2790 			       u64 usr_addr, int acc, struct ib_udata *udata)
2791 {
2792 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2793 	struct qedr_mr *mr;
2794 	struct qedr_pd *pd;
2795 	int rc = -ENOMEM;
2796 
2797 	pd = get_qedr_pd(ibpd);
2798 	DP_DEBUG(dev, QEDR_MSG_MR,
2799 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2800 		 pd->pd_id, start, len, usr_addr, acc);
2801 
2802 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2803 		return ERR_PTR(-EINVAL);
2804 
2805 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2806 	if (!mr)
2807 		return ERR_PTR(rc);
2808 
2809 	mr->type = QEDR_MR_USER;
2810 
2811 	mr->umem = ib_umem_get(udata, start, len, acc, 0);
2812 	if (IS_ERR(mr->umem)) {
2813 		rc = -EFAULT;
2814 		goto err0;
2815 	}
2816 
2817 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2818 	if (rc)
2819 		goto err1;
2820 
2821 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2822 			   &mr->info.pbl_info, PAGE_SHIFT);
2823 
2824 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2825 	if (rc) {
2826 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2827 		goto err1;
2828 	}
2829 
2830 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2831 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2832 	mr->hw_mr.key = 0;
2833 	mr->hw_mr.pd = pd->pd_id;
2834 	mr->hw_mr.local_read = 1;
2835 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2836 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2837 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2838 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2839 	mr->hw_mr.mw_bind = false;
2840 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2841 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2842 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2843 	mr->hw_mr.page_size_log = PAGE_SHIFT;
2844 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2845 	mr->hw_mr.length = len;
2846 	mr->hw_mr.vaddr = usr_addr;
2847 	mr->hw_mr.zbva = false;
2848 	mr->hw_mr.phy_mr = false;
2849 	mr->hw_mr.dma_mr = false;
2850 
2851 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2852 	if (rc) {
2853 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2854 		goto err2;
2855 	}
2856 
2857 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2858 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2859 	    mr->hw_mr.remote_atomic)
2860 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2861 
2862 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2863 		 mr->ibmr.lkey);
2864 	return &mr->ibmr;
2865 
2866 err2:
2867 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2868 err1:
2869 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2870 err0:
2871 	kfree(mr);
2872 	return ERR_PTR(rc);
2873 }
2874 
2875 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2876 {
2877 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2878 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2879 	int rc = 0;
2880 
2881 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2882 	if (rc)
2883 		return rc;
2884 
2885 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2886 
2887 	if (mr->type != QEDR_MR_DMA)
2888 		free_mr_info(dev, &mr->info);
2889 
2890 	/* it could be user registered memory. */
2891 	ib_umem_release(mr->umem);
2892 
2893 	kfree(mr);
2894 
2895 	return rc;
2896 }
2897 
2898 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2899 				       int max_page_list_len)
2900 {
2901 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2902 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2903 	struct qedr_mr *mr;
2904 	int rc = -ENOMEM;
2905 
2906 	DP_DEBUG(dev, QEDR_MSG_MR,
2907 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2908 		 max_page_list_len);
2909 
2910 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2911 	if (!mr)
2912 		return ERR_PTR(rc);
2913 
2914 	mr->dev = dev;
2915 	mr->type = QEDR_MR_FRMR;
2916 
2917 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2918 	if (rc)
2919 		goto err0;
2920 
2921 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2922 	if (rc) {
2923 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2924 		goto err0;
2925 	}
2926 
2927 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2928 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2929 	mr->hw_mr.key = 0;
2930 	mr->hw_mr.pd = pd->pd_id;
2931 	mr->hw_mr.local_read = 1;
2932 	mr->hw_mr.local_write = 0;
2933 	mr->hw_mr.remote_read = 0;
2934 	mr->hw_mr.remote_write = 0;
2935 	mr->hw_mr.remote_atomic = 0;
2936 	mr->hw_mr.mw_bind = false;
2937 	mr->hw_mr.pbl_ptr = 0;
2938 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2939 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2940 	mr->hw_mr.fbo = 0;
2941 	mr->hw_mr.length = 0;
2942 	mr->hw_mr.vaddr = 0;
2943 	mr->hw_mr.zbva = false;
2944 	mr->hw_mr.phy_mr = true;
2945 	mr->hw_mr.dma_mr = false;
2946 
2947 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2948 	if (rc) {
2949 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2950 		goto err1;
2951 	}
2952 
2953 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2954 	mr->ibmr.rkey = mr->ibmr.lkey;
2955 
2956 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2957 	return mr;
2958 
2959 err1:
2960 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2961 err0:
2962 	kfree(mr);
2963 	return ERR_PTR(rc);
2964 }
2965 
2966 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
2967 			    u32 max_num_sg, struct ib_udata *udata)
2968 {
2969 	struct qedr_mr *mr;
2970 
2971 	if (mr_type != IB_MR_TYPE_MEM_REG)
2972 		return ERR_PTR(-EINVAL);
2973 
2974 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2975 
2976 	if (IS_ERR(mr))
2977 		return ERR_PTR(-EINVAL);
2978 
2979 	return &mr->ibmr;
2980 }
2981 
2982 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2983 {
2984 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2985 	struct qedr_pbl *pbl_table;
2986 	struct regpair *pbe;
2987 	u32 pbes_in_page;
2988 
2989 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2990 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2991 		return -ENOMEM;
2992 	}
2993 
2994 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2995 		 mr->npages, addr);
2996 
2997 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2998 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2999 	pbe = (struct regpair *)pbl_table->va;
3000 	pbe +=  mr->npages % pbes_in_page;
3001 	pbe->lo = cpu_to_le32((u32)addr);
3002 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3003 
3004 	mr->npages++;
3005 
3006 	return 0;
3007 }
3008 
3009 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3010 {
3011 	int work = info->completed - info->completed_handled - 1;
3012 
3013 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3014 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3015 		struct qedr_pbl *pbl;
3016 
3017 		/* Free all the page list that are possible to be freed
3018 		 * (all the ones that were invalidated), under the assumption
3019 		 * that if an FMR was completed successfully that means that
3020 		 * if there was an invalidate operation before it also ended
3021 		 */
3022 		pbl = list_first_entry(&info->inuse_pbl_list,
3023 				       struct qedr_pbl, list_entry);
3024 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3025 		info->completed_handled++;
3026 	}
3027 }
3028 
3029 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3030 		   int sg_nents, unsigned int *sg_offset)
3031 {
3032 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3033 
3034 	mr->npages = 0;
3035 
3036 	handle_completed_mrs(mr->dev, &mr->info);
3037 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3038 }
3039 
3040 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3041 {
3042 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3043 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3044 	struct qedr_mr *mr;
3045 	int rc;
3046 
3047 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3048 	if (!mr)
3049 		return ERR_PTR(-ENOMEM);
3050 
3051 	mr->type = QEDR_MR_DMA;
3052 
3053 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3054 	if (rc) {
3055 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
3056 		goto err1;
3057 	}
3058 
3059 	/* index only, 18 bit long, lkey = itid << 8 | key */
3060 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3061 	mr->hw_mr.pd = pd->pd_id;
3062 	mr->hw_mr.local_read = 1;
3063 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3064 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3065 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3066 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3067 	mr->hw_mr.dma_mr = true;
3068 
3069 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3070 	if (rc) {
3071 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3072 		goto err2;
3073 	}
3074 
3075 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3076 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3077 	    mr->hw_mr.remote_atomic)
3078 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3079 
3080 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3081 	return &mr->ibmr;
3082 
3083 err2:
3084 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3085 err1:
3086 	kfree(mr);
3087 	return ERR_PTR(rc);
3088 }
3089 
3090 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3091 {
3092 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3093 }
3094 
3095 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3096 {
3097 	int i, len = 0;
3098 
3099 	for (i = 0; i < num_sge; i++)
3100 		len += sg_list[i].length;
3101 
3102 	return len;
3103 }
3104 
3105 static void swap_wqe_data64(u64 *p)
3106 {
3107 	int i;
3108 
3109 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3110 		*p = cpu_to_be64(cpu_to_le64(*p));
3111 }
3112 
3113 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3114 				       struct qedr_qp *qp, u8 *wqe_size,
3115 				       const struct ib_send_wr *wr,
3116 				       const struct ib_send_wr **bad_wr,
3117 				       u8 *bits, u8 bit)
3118 {
3119 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3120 	char *seg_prt, *wqe;
3121 	int i, seg_siz;
3122 
3123 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3124 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3125 		*bad_wr = wr;
3126 		return 0;
3127 	}
3128 
3129 	if (!data_size)
3130 		return data_size;
3131 
3132 	*bits |= bit;
3133 
3134 	seg_prt = NULL;
3135 	wqe = NULL;
3136 	seg_siz = 0;
3137 
3138 	/* Copy data inline */
3139 	for (i = 0; i < wr->num_sge; i++) {
3140 		u32 len = wr->sg_list[i].length;
3141 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3142 
3143 		while (len > 0) {
3144 			u32 cur;
3145 
3146 			/* New segment required */
3147 			if (!seg_siz) {
3148 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3149 				seg_prt = wqe;
3150 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3151 				(*wqe_size)++;
3152 			}
3153 
3154 			/* Calculate currently allowed length */
3155 			cur = min_t(u32, len, seg_siz);
3156 			memcpy(seg_prt, src, cur);
3157 
3158 			/* Update segment variables */
3159 			seg_prt += cur;
3160 			seg_siz -= cur;
3161 
3162 			/* Update sge variables */
3163 			src += cur;
3164 			len -= cur;
3165 
3166 			/* Swap fully-completed segments */
3167 			if (!seg_siz)
3168 				swap_wqe_data64((u64 *)wqe);
3169 		}
3170 	}
3171 
3172 	/* swap last not completed segment */
3173 	if (seg_siz)
3174 		swap_wqe_data64((u64 *)wqe);
3175 
3176 	return data_size;
3177 }
3178 
3179 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3180 	do {							\
3181 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3182 		(sge)->length = cpu_to_le32(vlength);		\
3183 		(sge)->flags = cpu_to_le32(vflags);		\
3184 	} while (0)
3185 
3186 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3187 	do {							\
3188 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3189 		(hdr)->num_sges = num_sge;			\
3190 	} while (0)
3191 
3192 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3193 	do {							\
3194 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3195 		(sge)->length = cpu_to_le32(vlength);		\
3196 		(sge)->l_key = cpu_to_le32(vlkey);		\
3197 	} while (0)
3198 
3199 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3200 				const struct ib_send_wr *wr)
3201 {
3202 	u32 data_size = 0;
3203 	int i;
3204 
3205 	for (i = 0; i < wr->num_sge; i++) {
3206 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3207 
3208 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3209 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3210 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3211 		data_size += wr->sg_list[i].length;
3212 	}
3213 
3214 	if (wqe_size)
3215 		*wqe_size += wr->num_sge;
3216 
3217 	return data_size;
3218 }
3219 
3220 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3221 				     struct qedr_qp *qp,
3222 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3223 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3224 				     const struct ib_send_wr *wr,
3225 				     const struct ib_send_wr **bad_wr)
3226 {
3227 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3228 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3229 
3230 	if (wr->send_flags & IB_SEND_INLINE &&
3231 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3232 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3233 		u8 flags = 0;
3234 
3235 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3236 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3237 						   bad_wr, &rwqe->flags, flags);
3238 	}
3239 
3240 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3241 }
3242 
3243 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3244 				     struct qedr_qp *qp,
3245 				     struct rdma_sq_send_wqe_1st *swqe,
3246 				     struct rdma_sq_send_wqe_2st *swqe2,
3247 				     const struct ib_send_wr *wr,
3248 				     const struct ib_send_wr **bad_wr)
3249 {
3250 	memset(swqe2, 0, sizeof(*swqe2));
3251 	if (wr->send_flags & IB_SEND_INLINE) {
3252 		u8 flags = 0;
3253 
3254 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3255 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3256 						   bad_wr, &swqe->flags, flags);
3257 	}
3258 
3259 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3260 }
3261 
3262 static int qedr_prepare_reg(struct qedr_qp *qp,
3263 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3264 			    const struct ib_reg_wr *wr)
3265 {
3266 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3267 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3268 
3269 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3270 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3271 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3272 	fwqe1->l_key = wr->key;
3273 
3274 	fwqe2->access_ctrl = 0;
3275 
3276 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3277 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3278 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3279 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3280 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3281 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3282 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3283 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3284 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3285 	fwqe2->fmr_ctrl = 0;
3286 
3287 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3288 		   ilog2(mr->ibmr.page_size) - 12);
3289 
3290 	fwqe2->length_hi = 0;
3291 	fwqe2->length_lo = mr->ibmr.length;
3292 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3293 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3294 
3295 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3296 
3297 	return 0;
3298 }
3299 
3300 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3301 {
3302 	switch (opcode) {
3303 	case IB_WR_RDMA_WRITE:
3304 	case IB_WR_RDMA_WRITE_WITH_IMM:
3305 		return IB_WC_RDMA_WRITE;
3306 	case IB_WR_SEND_WITH_IMM:
3307 	case IB_WR_SEND:
3308 	case IB_WR_SEND_WITH_INV:
3309 		return IB_WC_SEND;
3310 	case IB_WR_RDMA_READ:
3311 	case IB_WR_RDMA_READ_WITH_INV:
3312 		return IB_WC_RDMA_READ;
3313 	case IB_WR_ATOMIC_CMP_AND_SWP:
3314 		return IB_WC_COMP_SWAP;
3315 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3316 		return IB_WC_FETCH_ADD;
3317 	case IB_WR_REG_MR:
3318 		return IB_WC_REG_MR;
3319 	case IB_WR_LOCAL_INV:
3320 		return IB_WC_LOCAL_INV;
3321 	default:
3322 		return IB_WC_SEND;
3323 	}
3324 }
3325 
3326 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3327 				      const struct ib_send_wr *wr)
3328 {
3329 	int wq_is_full, err_wr, pbl_is_full;
3330 	struct qedr_dev *dev = qp->dev;
3331 
3332 	/* prevent SQ overflow and/or processing of a bad WR */
3333 	err_wr = wr->num_sge > qp->sq.max_sges;
3334 	wq_is_full = qedr_wq_is_full(&qp->sq);
3335 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3336 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3337 	if (wq_is_full || err_wr || pbl_is_full) {
3338 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3339 			DP_ERR(dev,
3340 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3341 			       qp);
3342 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3343 		}
3344 
3345 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3346 			DP_ERR(dev,
3347 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3348 			       qp);
3349 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3350 		}
3351 
3352 		if (pbl_is_full &&
3353 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3354 			DP_ERR(dev,
3355 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3356 			       qp);
3357 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3358 		}
3359 		return false;
3360 	}
3361 	return true;
3362 }
3363 
3364 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3365 			    const struct ib_send_wr **bad_wr)
3366 {
3367 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3368 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3369 	struct rdma_sq_atomic_wqe_1st *awqe1;
3370 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3371 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3372 	struct rdma_sq_send_wqe_2st *swqe2;
3373 	struct rdma_sq_local_inv_wqe *iwqe;
3374 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3375 	struct rdma_sq_send_wqe_1st *swqe;
3376 	struct rdma_sq_rdma_wqe_1st *rwqe;
3377 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3378 	struct rdma_sq_common_wqe *wqe;
3379 	u32 length;
3380 	int rc = 0;
3381 	bool comp;
3382 
3383 	if (!qedr_can_post_send(qp, wr)) {
3384 		*bad_wr = wr;
3385 		return -ENOMEM;
3386 	}
3387 
3388 	wqe = qed_chain_produce(&qp->sq.pbl);
3389 	qp->wqe_wr_id[qp->sq.prod].signaled =
3390 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3391 
3392 	wqe->flags = 0;
3393 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3394 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3395 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3396 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3397 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3398 		   !!(wr->send_flags & IB_SEND_FENCE));
3399 	wqe->prev_wqe_size = qp->prev_wqe_size;
3400 
3401 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3402 
3403 	switch (wr->opcode) {
3404 	case IB_WR_SEND_WITH_IMM:
3405 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3406 			rc = -EINVAL;
3407 			*bad_wr = wr;
3408 			break;
3409 		}
3410 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3411 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3412 		swqe->wqe_size = 2;
3413 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3414 
3415 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3416 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3417 						   wr, bad_wr);
3418 		swqe->length = cpu_to_le32(length);
3419 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3420 		qp->prev_wqe_size = swqe->wqe_size;
3421 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3422 		break;
3423 	case IB_WR_SEND:
3424 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3425 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3426 
3427 		swqe->wqe_size = 2;
3428 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3429 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3430 						   wr, bad_wr);
3431 		swqe->length = cpu_to_le32(length);
3432 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3433 		qp->prev_wqe_size = swqe->wqe_size;
3434 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3435 		break;
3436 	case IB_WR_SEND_WITH_INV:
3437 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3438 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3439 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3440 		swqe->wqe_size = 2;
3441 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3442 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3443 						   wr, bad_wr);
3444 		swqe->length = cpu_to_le32(length);
3445 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3446 		qp->prev_wqe_size = swqe->wqe_size;
3447 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3448 		break;
3449 
3450 	case IB_WR_RDMA_WRITE_WITH_IMM:
3451 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3452 			rc = -EINVAL;
3453 			*bad_wr = wr;
3454 			break;
3455 		}
3456 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3457 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3458 
3459 		rwqe->wqe_size = 2;
3460 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3461 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3462 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3463 						   wr, bad_wr);
3464 		rwqe->length = cpu_to_le32(length);
3465 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3466 		qp->prev_wqe_size = rwqe->wqe_size;
3467 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3468 		break;
3469 	case IB_WR_RDMA_WRITE:
3470 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3471 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3472 
3473 		rwqe->wqe_size = 2;
3474 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3475 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3476 						   wr, bad_wr);
3477 		rwqe->length = cpu_to_le32(length);
3478 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3479 		qp->prev_wqe_size = rwqe->wqe_size;
3480 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3481 		break;
3482 	case IB_WR_RDMA_READ_WITH_INV:
3483 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3484 		/* fallthrough -- same is identical to RDMA READ */
3485 
3486 	case IB_WR_RDMA_READ:
3487 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3488 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3489 
3490 		rwqe->wqe_size = 2;
3491 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3492 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3493 						   wr, bad_wr);
3494 		rwqe->length = cpu_to_le32(length);
3495 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3496 		qp->prev_wqe_size = rwqe->wqe_size;
3497 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3498 		break;
3499 
3500 	case IB_WR_ATOMIC_CMP_AND_SWP:
3501 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3502 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3503 		awqe1->wqe_size = 4;
3504 
3505 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3506 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3507 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3508 
3509 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3510 
3511 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3512 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3513 			DMA_REGPAIR_LE(awqe3->swap_data,
3514 				       atomic_wr(wr)->compare_add);
3515 		} else {
3516 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3517 			DMA_REGPAIR_LE(awqe3->swap_data,
3518 				       atomic_wr(wr)->swap);
3519 			DMA_REGPAIR_LE(awqe3->cmp_data,
3520 				       atomic_wr(wr)->compare_add);
3521 		}
3522 
3523 		qedr_prepare_sq_sges(qp, NULL, wr);
3524 
3525 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3526 		qp->prev_wqe_size = awqe1->wqe_size;
3527 		break;
3528 
3529 	case IB_WR_LOCAL_INV:
3530 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3531 		iwqe->wqe_size = 1;
3532 
3533 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3534 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3535 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3536 		qp->prev_wqe_size = iwqe->wqe_size;
3537 		break;
3538 	case IB_WR_REG_MR:
3539 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3540 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3541 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3542 		fwqe1->wqe_size = 2;
3543 
3544 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3545 		if (rc) {
3546 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3547 			*bad_wr = wr;
3548 			break;
3549 		}
3550 
3551 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3552 		qp->prev_wqe_size = fwqe1->wqe_size;
3553 		break;
3554 	default:
3555 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3556 		rc = -EINVAL;
3557 		*bad_wr = wr;
3558 		break;
3559 	}
3560 
3561 	if (*bad_wr) {
3562 		u16 value;
3563 
3564 		/* Restore prod to its position before
3565 		 * this WR was processed
3566 		 */
3567 		value = le16_to_cpu(qp->sq.db_data.data.value);
3568 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3569 
3570 		/* Restore prev_wqe_size */
3571 		qp->prev_wqe_size = wqe->prev_wqe_size;
3572 		rc = -EINVAL;
3573 		DP_ERR(dev, "POST SEND FAILED\n");
3574 	}
3575 
3576 	return rc;
3577 }
3578 
3579 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3580 		   const struct ib_send_wr **bad_wr)
3581 {
3582 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3583 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3584 	unsigned long flags;
3585 	int rc = 0;
3586 
3587 	*bad_wr = NULL;
3588 
3589 	if (qp->qp_type == IB_QPT_GSI)
3590 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3591 
3592 	spin_lock_irqsave(&qp->q_lock, flags);
3593 
3594 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3595 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3596 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3597 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3598 			spin_unlock_irqrestore(&qp->q_lock, flags);
3599 			*bad_wr = wr;
3600 			DP_DEBUG(dev, QEDR_MSG_CQ,
3601 				 "QP in wrong state! QP icid=0x%x state %d\n",
3602 				 qp->icid, qp->state);
3603 			return -EINVAL;
3604 		}
3605 	}
3606 
3607 	while (wr) {
3608 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3609 		if (rc)
3610 			break;
3611 
3612 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3613 
3614 		qedr_inc_sw_prod(&qp->sq);
3615 
3616 		qp->sq.db_data.data.value++;
3617 
3618 		wr = wr->next;
3619 	}
3620 
3621 	/* Trigger doorbell
3622 	 * If there was a failure in the first WR then it will be triggered in
3623 	 * vane. However this is not harmful (as long as the producer value is
3624 	 * unchanged). For performance reasons we avoid checking for this
3625 	 * redundant doorbell.
3626 	 *
3627 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3628 	 * soon as we give the doorbell, we could get a completion
3629 	 * for this wr, therefore we need to make sure that the
3630 	 * memory is updated before giving the doorbell.
3631 	 * During qedr_poll_cq, rmb is called before accessing the
3632 	 * cqe. This covers for the smp_rmb as well.
3633 	 */
3634 	smp_wmb();
3635 	writel(qp->sq.db_data.raw, qp->sq.db);
3636 
3637 	spin_unlock_irqrestore(&qp->q_lock, flags);
3638 
3639 	return rc;
3640 }
3641 
3642 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3643 {
3644 	u32 used;
3645 
3646 	/* Calculate number of elements used based on producer
3647 	 * count and consumer count and subtract it from max
3648 	 * work request supported so that we get elements left.
3649 	 */
3650 	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3651 
3652 	return hw_srq->max_wr - used;
3653 }
3654 
3655 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3656 		       const struct ib_recv_wr **bad_wr)
3657 {
3658 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3659 	struct qedr_srq_hwq_info *hw_srq;
3660 	struct qedr_dev *dev = srq->dev;
3661 	struct qed_chain *pbl;
3662 	unsigned long flags;
3663 	int status = 0;
3664 	u32 num_sge;
3665 	u32 offset;
3666 
3667 	spin_lock_irqsave(&srq->lock, flags);
3668 
3669 	hw_srq = &srq->hw_srq;
3670 	pbl = &srq->hw_srq.pbl;
3671 	while (wr) {
3672 		struct rdma_srq_wqe_header *hdr;
3673 		int i;
3674 
3675 		if (!qedr_srq_elem_left(hw_srq) ||
3676 		    wr->num_sge > srq->hw_srq.max_sges) {
3677 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3678 			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3679 			       wr->num_sge, srq->hw_srq.max_sges);
3680 			status = -ENOMEM;
3681 			*bad_wr = wr;
3682 			break;
3683 		}
3684 
3685 		hdr = qed_chain_produce(pbl);
3686 		num_sge = wr->num_sge;
3687 		/* Set number of sge and work request id in header */
3688 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3689 
3690 		srq->hw_srq.wr_prod_cnt++;
3691 		hw_srq->wqe_prod++;
3692 		hw_srq->sge_prod++;
3693 
3694 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3695 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3696 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3697 
3698 		for (i = 0; i < wr->num_sge; i++) {
3699 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3700 
3701 			/* Set SGE length, lkey and address */
3702 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3703 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3704 
3705 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3706 				 "[%d]: len %d key %x addr %x:%x\n",
3707 				 i, srq_sge->length, srq_sge->l_key,
3708 				 srq_sge->addr.hi, srq_sge->addr.lo);
3709 			hw_srq->sge_prod++;
3710 		}
3711 
3712 		/* Flush WQE and SGE information before
3713 		 * updating producer.
3714 		 */
3715 		wmb();
3716 
3717 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3718 		 * in first 4 bytes and need to update WQE producer in
3719 		 * next 4 bytes.
3720 		 */
3721 		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3722 		offset = offsetof(struct rdma_srq_producers, wqe_prod);
3723 		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3724 			hw_srq->wqe_prod;
3725 
3726 		/* Flush producer after updating it. */
3727 		wmb();
3728 		wr = wr->next;
3729 	}
3730 
3731 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3732 		 qed_chain_get_elem_left(pbl));
3733 	spin_unlock_irqrestore(&srq->lock, flags);
3734 
3735 	return status;
3736 }
3737 
3738 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3739 		   const struct ib_recv_wr **bad_wr)
3740 {
3741 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3742 	struct qedr_dev *dev = qp->dev;
3743 	unsigned long flags;
3744 	int status = 0;
3745 
3746 	if (qp->qp_type == IB_QPT_GSI)
3747 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3748 
3749 	spin_lock_irqsave(&qp->q_lock, flags);
3750 
3751 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3752 		spin_unlock_irqrestore(&qp->q_lock, flags);
3753 		*bad_wr = wr;
3754 		return -EINVAL;
3755 	}
3756 
3757 	while (wr) {
3758 		int i;
3759 
3760 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3761 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3762 		    wr->num_sge > qp->rq.max_sges) {
3763 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3764 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3765 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3766 			       qp->rq.max_sges);
3767 			status = -ENOMEM;
3768 			*bad_wr = wr;
3769 			break;
3770 		}
3771 		for (i = 0; i < wr->num_sge; i++) {
3772 			u32 flags = 0;
3773 			struct rdma_rq_sge *rqe =
3774 			    qed_chain_produce(&qp->rq.pbl);
3775 
3776 			/* First one must include the number
3777 			 * of SGE in the list
3778 			 */
3779 			if (!i)
3780 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3781 					  wr->num_sge);
3782 
3783 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3784 				  wr->sg_list[i].lkey);
3785 
3786 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3787 				   wr->sg_list[i].length, flags);
3788 		}
3789 
3790 		/* Special case of no sges. FW requires between 1-4 sges...
3791 		 * in this case we need to post 1 sge with length zero. this is
3792 		 * because rdma write with immediate consumes an RQ.
3793 		 */
3794 		if (!wr->num_sge) {
3795 			u32 flags = 0;
3796 			struct rdma_rq_sge *rqe =
3797 			    qed_chain_produce(&qp->rq.pbl);
3798 
3799 			/* First one must include the number
3800 			 * of SGE in the list
3801 			 */
3802 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3803 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3804 
3805 			RQ_SGE_SET(rqe, 0, 0, flags);
3806 			i = 1;
3807 		}
3808 
3809 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3810 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3811 
3812 		qedr_inc_sw_prod(&qp->rq);
3813 
3814 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3815 		 * soon as we give the doorbell, we could get a completion
3816 		 * for this wr, therefore we need to make sure that the
3817 		 * memory is update before giving the doorbell.
3818 		 * During qedr_poll_cq, rmb is called before accessing the
3819 		 * cqe. This covers for the smp_rmb as well.
3820 		 */
3821 		smp_wmb();
3822 
3823 		qp->rq.db_data.data.value++;
3824 
3825 		writel(qp->rq.db_data.raw, qp->rq.db);
3826 
3827 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3828 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3829 		}
3830 
3831 		wr = wr->next;
3832 	}
3833 
3834 	spin_unlock_irqrestore(&qp->q_lock, flags);
3835 
3836 	return status;
3837 }
3838 
3839 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3840 {
3841 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3842 
3843 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3844 		cq->pbl_toggle;
3845 }
3846 
3847 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3848 {
3849 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3850 	struct qedr_qp *qp;
3851 
3852 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3853 						   resp_cqe->qp_handle.lo,
3854 						   u64);
3855 	return qp;
3856 }
3857 
3858 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3859 {
3860 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3861 
3862 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3863 }
3864 
3865 /* Return latest CQE (needs processing) */
3866 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3867 {
3868 	return cq->latest_cqe;
3869 }
3870 
3871 /* In fmr we need to increase the number of fmr completed counter for the fmr
3872  * algorithm determining whether we can free a pbl or not.
3873  * we need to perform this whether the work request was signaled or not. for
3874  * this purpose we call this function from the condition that checks if a wr
3875  * should be skipped, to make sure we don't miss it ( possibly this fmr
3876  * operation was not signalted)
3877  */
3878 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3879 {
3880 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3881 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3882 }
3883 
3884 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3885 		       struct qedr_cq *cq, int num_entries,
3886 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3887 		       int force)
3888 {
3889 	u16 cnt = 0;
3890 
3891 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3892 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3893 			qedr_chk_if_fmr(qp);
3894 			/* skip WC */
3895 			goto next_cqe;
3896 		}
3897 
3898 		/* fill WC */
3899 		wc->status = status;
3900 		wc->vendor_err = 0;
3901 		wc->wc_flags = 0;
3902 		wc->src_qp = qp->id;
3903 		wc->qp = &qp->ibqp;
3904 
3905 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3906 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3907 
3908 		switch (wc->opcode) {
3909 		case IB_WC_RDMA_WRITE:
3910 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3911 			break;
3912 		case IB_WC_COMP_SWAP:
3913 		case IB_WC_FETCH_ADD:
3914 			wc->byte_len = 8;
3915 			break;
3916 		case IB_WC_REG_MR:
3917 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3918 			break;
3919 		case IB_WC_RDMA_READ:
3920 		case IB_WC_SEND:
3921 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3922 			break;
3923 		default:
3924 			break;
3925 		}
3926 
3927 		num_entries--;
3928 		wc++;
3929 		cnt++;
3930 next_cqe:
3931 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3932 			qed_chain_consume(&qp->sq.pbl);
3933 		qedr_inc_sw_cons(&qp->sq);
3934 	}
3935 
3936 	return cnt;
3937 }
3938 
3939 static int qedr_poll_cq_req(struct qedr_dev *dev,
3940 			    struct qedr_qp *qp, struct qedr_cq *cq,
3941 			    int num_entries, struct ib_wc *wc,
3942 			    struct rdma_cqe_requester *req)
3943 {
3944 	int cnt = 0;
3945 
3946 	switch (req->status) {
3947 	case RDMA_CQE_REQ_STS_OK:
3948 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3949 				  IB_WC_SUCCESS, 0);
3950 		break;
3951 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3952 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3953 			DP_DEBUG(dev, QEDR_MSG_CQ,
3954 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3955 				 cq->icid, qp->icid);
3956 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3957 				  IB_WC_WR_FLUSH_ERR, 1);
3958 		break;
3959 	default:
3960 		/* process all WQE before the cosumer */
3961 		qp->state = QED_ROCE_QP_STATE_ERR;
3962 		cnt = process_req(dev, qp, cq, num_entries, wc,
3963 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3964 		wc += cnt;
3965 		/* if we have extra WC fill it with actual error info */
3966 		if (cnt < num_entries) {
3967 			enum ib_wc_status wc_status;
3968 
3969 			switch (req->status) {
3970 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3971 				DP_ERR(dev,
3972 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3973 				       cq->icid, qp->icid);
3974 				wc_status = IB_WC_BAD_RESP_ERR;
3975 				break;
3976 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3977 				DP_ERR(dev,
3978 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3979 				       cq->icid, qp->icid);
3980 				wc_status = IB_WC_LOC_LEN_ERR;
3981 				break;
3982 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3983 				DP_ERR(dev,
3984 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3985 				       cq->icid, qp->icid);
3986 				wc_status = IB_WC_LOC_QP_OP_ERR;
3987 				break;
3988 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3989 				DP_ERR(dev,
3990 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3991 				       cq->icid, qp->icid);
3992 				wc_status = IB_WC_LOC_PROT_ERR;
3993 				break;
3994 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3995 				DP_ERR(dev,
3996 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3997 				       cq->icid, qp->icid);
3998 				wc_status = IB_WC_MW_BIND_ERR;
3999 				break;
4000 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4001 				DP_ERR(dev,
4002 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4003 				       cq->icid, qp->icid);
4004 				wc_status = IB_WC_REM_INV_REQ_ERR;
4005 				break;
4006 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4007 				DP_ERR(dev,
4008 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4009 				       cq->icid, qp->icid);
4010 				wc_status = IB_WC_REM_ACCESS_ERR;
4011 				break;
4012 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4013 				DP_ERR(dev,
4014 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4015 				       cq->icid, qp->icid);
4016 				wc_status = IB_WC_REM_OP_ERR;
4017 				break;
4018 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4019 				DP_ERR(dev,
4020 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4021 				       cq->icid, qp->icid);
4022 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4023 				break;
4024 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4025 				DP_ERR(dev,
4026 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4027 				       cq->icid, qp->icid);
4028 				wc_status = IB_WC_RETRY_EXC_ERR;
4029 				break;
4030 			default:
4031 				DP_ERR(dev,
4032 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4033 				       cq->icid, qp->icid);
4034 				wc_status = IB_WC_GENERAL_ERR;
4035 			}
4036 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4037 					   wc_status, 1);
4038 		}
4039 	}
4040 
4041 	return cnt;
4042 }
4043 
4044 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4045 {
4046 	switch (status) {
4047 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4048 		return IB_WC_LOC_ACCESS_ERR;
4049 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4050 		return IB_WC_LOC_LEN_ERR;
4051 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4052 		return IB_WC_LOC_QP_OP_ERR;
4053 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4054 		return IB_WC_LOC_PROT_ERR;
4055 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4056 		return IB_WC_MW_BIND_ERR;
4057 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4058 		return IB_WC_REM_INV_RD_REQ_ERR;
4059 	case RDMA_CQE_RESP_STS_OK:
4060 		return IB_WC_SUCCESS;
4061 	default:
4062 		return IB_WC_GENERAL_ERR;
4063 	}
4064 }
4065 
4066 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4067 					  struct ib_wc *wc)
4068 {
4069 	wc->status = IB_WC_SUCCESS;
4070 	wc->byte_len = le32_to_cpu(resp->length);
4071 
4072 	if (resp->flags & QEDR_RESP_IMM) {
4073 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4074 		wc->wc_flags |= IB_WC_WITH_IMM;
4075 
4076 		if (resp->flags & QEDR_RESP_RDMA)
4077 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4078 
4079 		if (resp->flags & QEDR_RESP_INV)
4080 			return -EINVAL;
4081 
4082 	} else if (resp->flags & QEDR_RESP_INV) {
4083 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4084 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4085 
4086 		if (resp->flags & QEDR_RESP_RDMA)
4087 			return -EINVAL;
4088 
4089 	} else if (resp->flags & QEDR_RESP_RDMA) {
4090 		return -EINVAL;
4091 	}
4092 
4093 	return 0;
4094 }
4095 
4096 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4097 			       struct qedr_cq *cq, struct ib_wc *wc,
4098 			       struct rdma_cqe_responder *resp, u64 wr_id)
4099 {
4100 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4101 	wc->opcode = IB_WC_RECV;
4102 	wc->wc_flags = 0;
4103 
4104 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4105 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4106 			DP_ERR(dev,
4107 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4108 			       cq, cq->icid, resp->flags);
4109 
4110 	} else {
4111 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4112 		if (wc->status == IB_WC_GENERAL_ERR)
4113 			DP_ERR(dev,
4114 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4115 			       cq, cq->icid, resp->status);
4116 	}
4117 
4118 	/* Fill the rest of the WC */
4119 	wc->vendor_err = 0;
4120 	wc->src_qp = qp->id;
4121 	wc->qp = &qp->ibqp;
4122 	wc->wr_id = wr_id;
4123 }
4124 
4125 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4126 				struct qedr_cq *cq, struct ib_wc *wc,
4127 				struct rdma_cqe_responder *resp)
4128 {
4129 	struct qedr_srq *srq = qp->srq;
4130 	u64 wr_id;
4131 
4132 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4133 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4134 
4135 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4136 		wc->status = IB_WC_WR_FLUSH_ERR;
4137 		wc->vendor_err = 0;
4138 		wc->wr_id = wr_id;
4139 		wc->byte_len = 0;
4140 		wc->src_qp = qp->id;
4141 		wc->qp = &qp->ibqp;
4142 		wc->wr_id = wr_id;
4143 	} else {
4144 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4145 	}
4146 	srq->hw_srq.wr_cons_cnt++;
4147 
4148 	return 1;
4149 }
4150 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4151 			    struct qedr_cq *cq, struct ib_wc *wc,
4152 			    struct rdma_cqe_responder *resp)
4153 {
4154 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4155 
4156 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4157 
4158 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4159 		qed_chain_consume(&qp->rq.pbl);
4160 	qedr_inc_sw_cons(&qp->rq);
4161 
4162 	return 1;
4163 }
4164 
4165 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4166 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4167 {
4168 	u16 cnt = 0;
4169 
4170 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4171 		/* fill WC */
4172 		wc->status = IB_WC_WR_FLUSH_ERR;
4173 		wc->vendor_err = 0;
4174 		wc->wc_flags = 0;
4175 		wc->src_qp = qp->id;
4176 		wc->byte_len = 0;
4177 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4178 		wc->qp = &qp->ibqp;
4179 		num_entries--;
4180 		wc++;
4181 		cnt++;
4182 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4183 			qed_chain_consume(&qp->rq.pbl);
4184 		qedr_inc_sw_cons(&qp->rq);
4185 	}
4186 
4187 	return cnt;
4188 }
4189 
4190 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4191 				 struct rdma_cqe_responder *resp, int *update)
4192 {
4193 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4194 		consume_cqe(cq);
4195 		*update |= 1;
4196 	}
4197 }
4198 
4199 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4200 				 struct qedr_cq *cq, int num_entries,
4201 				 struct ib_wc *wc,
4202 				 struct rdma_cqe_responder *resp)
4203 {
4204 	int cnt;
4205 
4206 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4207 	consume_cqe(cq);
4208 
4209 	return cnt;
4210 }
4211 
4212 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4213 			     struct qedr_cq *cq, int num_entries,
4214 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4215 			     int *update)
4216 {
4217 	int cnt;
4218 
4219 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4220 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4221 					 resp->rq_cons_or_srq_id);
4222 		try_consume_resp_cqe(cq, qp, resp, update);
4223 	} else {
4224 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4225 		consume_cqe(cq);
4226 		*update |= 1;
4227 	}
4228 
4229 	return cnt;
4230 }
4231 
4232 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4233 				struct rdma_cqe_requester *req, int *update)
4234 {
4235 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4236 		consume_cqe(cq);
4237 		*update |= 1;
4238 	}
4239 }
4240 
4241 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4242 {
4243 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4244 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4245 	union rdma_cqe *cqe;
4246 	u32 old_cons, new_cons;
4247 	unsigned long flags;
4248 	int update = 0;
4249 	int done = 0;
4250 
4251 	if (cq->destroyed) {
4252 		DP_ERR(dev,
4253 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4254 		       cq, cq->icid);
4255 		return 0;
4256 	}
4257 
4258 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4259 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4260 
4261 	spin_lock_irqsave(&cq->cq_lock, flags);
4262 	cqe = cq->latest_cqe;
4263 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4264 	while (num_entries && is_valid_cqe(cq, cqe)) {
4265 		struct qedr_qp *qp;
4266 		int cnt = 0;
4267 
4268 		/* prevent speculative reads of any field of CQE */
4269 		rmb();
4270 
4271 		qp = cqe_get_qp(cqe);
4272 		if (!qp) {
4273 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4274 			break;
4275 		}
4276 
4277 		wc->qp = &qp->ibqp;
4278 
4279 		switch (cqe_get_type(cqe)) {
4280 		case RDMA_CQE_TYPE_REQUESTER:
4281 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4282 					       &cqe->req);
4283 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4284 			break;
4285 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4286 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4287 						&cqe->resp, &update);
4288 			break;
4289 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4290 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4291 						    wc, &cqe->resp);
4292 			update = 1;
4293 			break;
4294 		case RDMA_CQE_TYPE_INVALID:
4295 		default:
4296 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4297 			       cqe_get_type(cqe));
4298 		}
4299 		num_entries -= cnt;
4300 		wc += cnt;
4301 		done += cnt;
4302 
4303 		cqe = get_cqe(cq);
4304 	}
4305 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4306 
4307 	cq->cq_cons += new_cons - old_cons;
4308 
4309 	if (update)
4310 		/* doorbell notifies abount latest VALID entry,
4311 		 * but chain already point to the next INVALID one
4312 		 */
4313 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4314 
4315 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4316 	return done;
4317 }
4318 
4319 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4320 		     u8 port_num,
4321 		     const struct ib_wc *in_wc,
4322 		     const struct ib_grh *in_grh,
4323 		     const struct ib_mad_hdr *mad_hdr,
4324 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
4325 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4326 {
4327 	struct qedr_dev *dev = get_qedr_dev(ibdev);
4328 
4329 	DP_DEBUG(dev, QEDR_MSG_GSI,
4330 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
4331 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
4332 		 mad_hdr->class_specific, mad_hdr->class_version,
4333 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
4334 	return IB_MAD_RESULT_SUCCESS;
4335 }
4336