xref: /openbmc/linux/drivers/infiniband/hw/qedr/verbs.c (revision 15e3ae36)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
55 
56 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
57 #define	RDMA_MAX_SGE_PER_SRQ	(4)
58 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
59 
60 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
61 
62 enum {
63 	QEDR_USER_MMAP_IO_WC = 0,
64 	QEDR_USER_MMAP_PHYS_PAGE,
65 };
66 
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
68 					size_t len)
69 {
70 	size_t min_len = min_t(size_t, len, udata->outlen);
71 
72 	return ib_copy_to_udata(udata, src, min_len);
73 }
74 
75 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
76 {
77 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
78 		return -EINVAL;
79 
80 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
81 	return 0;
82 }
83 
84 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
85 		      int index, union ib_gid *sgid)
86 {
87 	struct qedr_dev *dev = get_qedr_dev(ibdev);
88 
89 	memset(sgid->raw, 0, sizeof(sgid->raw));
90 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
91 
92 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 		 sgid->global.interface_id, sgid->global.subnet_prefix);
94 
95 	return 0;
96 }
97 
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
99 {
100 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 	struct qedr_device_attr *qattr = &dev->attr;
102 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
103 
104 	srq_attr->srq_limit = srq->srq_limit;
105 	srq_attr->max_wr = qattr->max_srq_wr;
106 	srq_attr->max_sge = qattr->max_sge;
107 
108 	return 0;
109 }
110 
111 int qedr_query_device(struct ib_device *ibdev,
112 		      struct ib_device_attr *attr, struct ib_udata *udata)
113 {
114 	struct qedr_dev *dev = get_qedr_dev(ibdev);
115 	struct qedr_device_attr *qattr = &dev->attr;
116 
117 	if (!dev->rdma_ctx) {
118 		DP_ERR(dev,
119 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
120 		       dev->rdma_ctx);
121 		return -EINVAL;
122 	}
123 
124 	memset(attr, 0, sizeof(*attr));
125 
126 	attr->fw_ver = qattr->fw_ver;
127 	attr->sys_image_guid = qattr->sys_image_guid;
128 	attr->max_mr_size = qattr->max_mr_size;
129 	attr->page_size_cap = qattr->page_size_caps;
130 	attr->vendor_id = qattr->vendor_id;
131 	attr->vendor_part_id = qattr->vendor_part_id;
132 	attr->hw_ver = qattr->hw_ver;
133 	attr->max_qp = qattr->max_qp;
134 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 	    IB_DEVICE_RC_RNR_NAK_GEN |
137 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
138 
139 	attr->max_send_sge = qattr->max_sge;
140 	attr->max_recv_sge = qattr->max_sge;
141 	attr->max_sge_rd = qattr->max_sge;
142 	attr->max_cq = qattr->max_cq;
143 	attr->max_cqe = qattr->max_cqe;
144 	attr->max_mr = qattr->max_mr;
145 	attr->max_mw = qattr->max_mw;
146 	attr->max_pd = qattr->max_pd;
147 	attr->atomic_cap = dev->atomic_cap;
148 	attr->max_fmr = qattr->max_fmr;
149 	attr->max_map_per_fmr = 16;
150 	attr->max_qp_init_rd_atom =
151 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
152 	attr->max_qp_rd_atom =
153 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
154 		attr->max_qp_init_rd_atom);
155 
156 	attr->max_srq = qattr->max_srq;
157 	attr->max_srq_sge = qattr->max_srq_sge;
158 	attr->max_srq_wr = qattr->max_srq_wr;
159 
160 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
161 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
162 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
163 	attr->max_ah = qattr->max_ah;
164 
165 	return 0;
166 }
167 
168 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
169 					    u8 *ib_width)
170 {
171 	switch (speed) {
172 	case 1000:
173 		*ib_speed = IB_SPEED_SDR;
174 		*ib_width = IB_WIDTH_1X;
175 		break;
176 	case 10000:
177 		*ib_speed = IB_SPEED_QDR;
178 		*ib_width = IB_WIDTH_1X;
179 		break;
180 
181 	case 20000:
182 		*ib_speed = IB_SPEED_DDR;
183 		*ib_width = IB_WIDTH_4X;
184 		break;
185 
186 	case 25000:
187 		*ib_speed = IB_SPEED_EDR;
188 		*ib_width = IB_WIDTH_1X;
189 		break;
190 
191 	case 40000:
192 		*ib_speed = IB_SPEED_QDR;
193 		*ib_width = IB_WIDTH_4X;
194 		break;
195 
196 	case 50000:
197 		*ib_speed = IB_SPEED_HDR;
198 		*ib_width = IB_WIDTH_1X;
199 		break;
200 
201 	case 100000:
202 		*ib_speed = IB_SPEED_EDR;
203 		*ib_width = IB_WIDTH_4X;
204 		break;
205 
206 	default:
207 		/* Unsupported */
208 		*ib_speed = IB_SPEED_SDR;
209 		*ib_width = IB_WIDTH_1X;
210 	}
211 }
212 
213 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
214 {
215 	struct qedr_dev *dev;
216 	struct qed_rdma_port *rdma_port;
217 
218 	dev = get_qedr_dev(ibdev);
219 
220 	if (!dev->rdma_ctx) {
221 		DP_ERR(dev, "rdma_ctx is NULL\n");
222 		return -EINVAL;
223 	}
224 
225 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
226 
227 	/* *attr being zeroed by the caller, avoid zeroing it here */
228 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
229 		attr->state = IB_PORT_ACTIVE;
230 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
231 	} else {
232 		attr->state = IB_PORT_DOWN;
233 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
234 	}
235 	attr->max_mtu = IB_MTU_4096;
236 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
237 	attr->lid = 0;
238 	attr->lmc = 0;
239 	attr->sm_lid = 0;
240 	attr->sm_sl = 0;
241 	attr->ip_gids = true;
242 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
243 		attr->gid_tbl_len = 1;
244 		attr->pkey_tbl_len = 1;
245 	} else {
246 		attr->gid_tbl_len = QEDR_MAX_SGID;
247 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
248 	}
249 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
250 	attr->qkey_viol_cntr = 0;
251 	get_link_speed_and_width(rdma_port->link_speed,
252 				 &attr->active_speed, &attr->active_width);
253 	attr->max_msg_sz = rdma_port->max_msg_size;
254 	attr->max_vl_num = 4;
255 
256 	return 0;
257 }
258 
259 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
260 {
261 	struct ib_device *ibdev = uctx->device;
262 	int rc;
263 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
264 	struct qedr_alloc_ucontext_resp uresp = {};
265 	struct qedr_alloc_ucontext_req ureq = {};
266 	struct qedr_dev *dev = get_qedr_dev(ibdev);
267 	struct qed_rdma_add_user_out_params oparams;
268 	struct qedr_user_mmap_entry *entry;
269 
270 	if (!udata)
271 		return -EFAULT;
272 
273 	if (udata->inlen) {
274 		rc = ib_copy_from_udata(&ureq, udata,
275 					min(sizeof(ureq), udata->inlen));
276 		if (rc) {
277 			DP_ERR(dev, "Problem copying data from user space\n");
278 			return -EFAULT;
279 		}
280 
281 		ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
282 	}
283 
284 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
285 	if (rc) {
286 		DP_ERR(dev,
287 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
288 		       rc);
289 		return rc;
290 	}
291 
292 	ctx->dpi = oparams.dpi;
293 	ctx->dpi_addr = oparams.dpi_addr;
294 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
295 	ctx->dpi_size = oparams.dpi_size;
296 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
297 	if (!entry) {
298 		rc = -ENOMEM;
299 		goto err;
300 	}
301 
302 	entry->io_address = ctx->dpi_phys_addr;
303 	entry->length = ctx->dpi_size;
304 	entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
305 	entry->dpi = ctx->dpi;
306 	entry->dev = dev;
307 	rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
308 					 ctx->dpi_size);
309 	if (rc) {
310 		kfree(entry);
311 		goto err;
312 	}
313 	ctx->db_mmap_entry = &entry->rdma_entry;
314 
315 	if (!dev->user_dpm_enabled)
316 		uresp.dpm_flags = 0;
317 	else if (rdma_protocol_iwarp(&dev->ibdev, 1))
318 		uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
319 	else
320 		uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
321 				  QEDR_DPM_TYPE_ROCE_LEGACY;
322 
323 	uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
324 	uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
325 	uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
326 
327 	uresp.wids_enabled = 1;
328 	uresp.wid_count = oparams.wid_count;
329 	uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
330 	uresp.db_size = ctx->dpi_size;
331 	uresp.max_send_wr = dev->attr.max_sqe;
332 	uresp.max_recv_wr = dev->attr.max_rqe;
333 	uresp.max_srq_wr = dev->attr.max_srq_wr;
334 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
335 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
336 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
337 	uresp.max_cqes = QEDR_MAX_CQES;
338 
339 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
340 	if (rc)
341 		goto err;
342 
343 	ctx->dev = dev;
344 
345 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
346 		 &ctx->ibucontext);
347 	return 0;
348 
349 err:
350 	if (!ctx->db_mmap_entry)
351 		dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
352 	else
353 		rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
354 
355 	return rc;
356 }
357 
358 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
359 {
360 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
361 
362 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
363 		 uctx);
364 
365 	rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
366 }
367 
368 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
369 {
370 	struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
371 	struct qedr_dev *dev = entry->dev;
372 
373 	if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
374 		free_page((unsigned long)entry->address);
375 	else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
376 		dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
377 
378 	kfree(entry);
379 }
380 
381 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
382 {
383 	struct ib_device *dev = ucontext->device;
384 	size_t length = vma->vm_end - vma->vm_start;
385 	struct rdma_user_mmap_entry *rdma_entry;
386 	struct qedr_user_mmap_entry *entry;
387 	int rc = 0;
388 	u64 pfn;
389 
390 	ibdev_dbg(dev,
391 		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
392 		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
393 
394 	rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
395 	if (!rdma_entry) {
396 		ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
397 			  vma->vm_pgoff);
398 		return -EINVAL;
399 	}
400 	entry = get_qedr_mmap_entry(rdma_entry);
401 	ibdev_dbg(dev,
402 		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
403 		  entry->io_address, length, entry->mmap_flag);
404 
405 	switch (entry->mmap_flag) {
406 	case QEDR_USER_MMAP_IO_WC:
407 		pfn = entry->io_address >> PAGE_SHIFT;
408 		rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
409 				       pgprot_writecombine(vma->vm_page_prot),
410 				       rdma_entry);
411 		break;
412 	case QEDR_USER_MMAP_PHYS_PAGE:
413 		rc = vm_insert_page(vma, vma->vm_start,
414 				    virt_to_page(entry->address));
415 		break;
416 	default:
417 		rc = -EINVAL;
418 	}
419 
420 	if (rc)
421 		ibdev_dbg(dev,
422 			  "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
423 			  entry->io_address, length, entry->mmap_flag, rc);
424 
425 	rdma_user_mmap_entry_put(rdma_entry);
426 	return rc;
427 }
428 
429 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
430 {
431 	struct ib_device *ibdev = ibpd->device;
432 	struct qedr_dev *dev = get_qedr_dev(ibdev);
433 	struct qedr_pd *pd = get_qedr_pd(ibpd);
434 	u16 pd_id;
435 	int rc;
436 
437 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
438 		 udata ? "User Lib" : "Kernel");
439 
440 	if (!dev->rdma_ctx) {
441 		DP_ERR(dev, "invalid RDMA context\n");
442 		return -EINVAL;
443 	}
444 
445 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
446 	if (rc)
447 		return rc;
448 
449 	pd->pd_id = pd_id;
450 
451 	if (udata) {
452 		struct qedr_alloc_pd_uresp uresp = {
453 			.pd_id = pd_id,
454 		};
455 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
456 			udata, struct qedr_ucontext, ibucontext);
457 
458 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
459 		if (rc) {
460 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
461 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
462 			return rc;
463 		}
464 
465 		pd->uctx = context;
466 		pd->uctx->pd = pd;
467 	}
468 
469 	return 0;
470 }
471 
472 void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
473 {
474 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
475 	struct qedr_pd *pd = get_qedr_pd(ibpd);
476 
477 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
478 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
479 }
480 
481 static void qedr_free_pbl(struct qedr_dev *dev,
482 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
483 {
484 	struct pci_dev *pdev = dev->pdev;
485 	int i;
486 
487 	for (i = 0; i < pbl_info->num_pbls; i++) {
488 		if (!pbl[i].va)
489 			continue;
490 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
491 				  pbl[i].va, pbl[i].pa);
492 	}
493 
494 	kfree(pbl);
495 }
496 
497 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
498 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
499 
500 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
501 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
502 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
503 
504 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
505 					   struct qedr_pbl_info *pbl_info,
506 					   gfp_t flags)
507 {
508 	struct pci_dev *pdev = dev->pdev;
509 	struct qedr_pbl *pbl_table;
510 	dma_addr_t *pbl_main_tbl;
511 	dma_addr_t pa;
512 	void *va;
513 	int i;
514 
515 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
516 	if (!pbl_table)
517 		return ERR_PTR(-ENOMEM);
518 
519 	for (i = 0; i < pbl_info->num_pbls; i++) {
520 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
521 					flags);
522 		if (!va)
523 			goto err;
524 
525 		pbl_table[i].va = va;
526 		pbl_table[i].pa = pa;
527 	}
528 
529 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
530 	 * the first one with physical pointers to all of the rest
531 	 */
532 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
533 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
534 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
535 
536 	return pbl_table;
537 
538 err:
539 	for (i--; i >= 0; i--)
540 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
541 				  pbl_table[i].va, pbl_table[i].pa);
542 
543 	qedr_free_pbl(dev, pbl_info, pbl_table);
544 
545 	return ERR_PTR(-ENOMEM);
546 }
547 
548 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
549 				struct qedr_pbl_info *pbl_info,
550 				u32 num_pbes, int two_layer_capable)
551 {
552 	u32 pbl_capacity;
553 	u32 pbl_size;
554 	u32 num_pbls;
555 
556 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
557 		if (num_pbes > MAX_PBES_TWO_LAYER) {
558 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
559 			       num_pbes);
560 			return -EINVAL;
561 		}
562 
563 		/* calculate required pbl page size */
564 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
565 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
566 			       NUM_PBES_ON_PAGE(pbl_size);
567 
568 		while (pbl_capacity < num_pbes) {
569 			pbl_size *= 2;
570 			pbl_capacity = pbl_size / sizeof(u64);
571 			pbl_capacity = pbl_capacity * pbl_capacity;
572 		}
573 
574 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
575 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
576 		pbl_info->two_layered = true;
577 	} else {
578 		/* One layered PBL */
579 		num_pbls = 1;
580 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
581 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
582 		pbl_info->two_layered = false;
583 	}
584 
585 	pbl_info->num_pbls = num_pbls;
586 	pbl_info->pbl_size = pbl_size;
587 	pbl_info->num_pbes = num_pbes;
588 
589 	DP_DEBUG(dev, QEDR_MSG_MR,
590 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
591 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
592 
593 	return 0;
594 }
595 
596 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
597 			       struct qedr_pbl *pbl,
598 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
599 {
600 	int pbe_cnt, total_num_pbes = 0;
601 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
602 	struct qedr_pbl *pbl_tbl;
603 	struct sg_dma_page_iter sg_iter;
604 	struct regpair *pbe;
605 	u64 pg_addr;
606 
607 	if (!pbl_info->num_pbes)
608 		return;
609 
610 	/* If we have a two layered pbl, the first pbl points to the rest
611 	 * of the pbls and the first entry lays on the second pbl in the table
612 	 */
613 	if (pbl_info->two_layered)
614 		pbl_tbl = &pbl[1];
615 	else
616 		pbl_tbl = pbl;
617 
618 	pbe = (struct regpair *)pbl_tbl->va;
619 	if (!pbe) {
620 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
621 		return;
622 	}
623 
624 	pbe_cnt = 0;
625 
626 	fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
627 
628 	for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
629 		pg_addr = sg_page_iter_dma_address(&sg_iter);
630 		for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
631 			pbe->lo = cpu_to_le32(pg_addr);
632 			pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
633 
634 			pg_addr += BIT(pg_shift);
635 			pbe_cnt++;
636 			total_num_pbes++;
637 			pbe++;
638 
639 			if (total_num_pbes == pbl_info->num_pbes)
640 				return;
641 
642 			/* If the given pbl is full storing the pbes,
643 			 * move to next pbl.
644 			 */
645 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
646 				pbl_tbl++;
647 				pbe = (struct regpair *)pbl_tbl->va;
648 				pbe_cnt = 0;
649 			}
650 
651 			fw_pg_cnt++;
652 		}
653 	}
654 }
655 
656 static int qedr_db_recovery_add(struct qedr_dev *dev,
657 				void __iomem *db_addr,
658 				void *db_data,
659 				enum qed_db_rec_width db_width,
660 				enum qed_db_rec_space db_space)
661 {
662 	if (!db_data) {
663 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
664 		return 0;
665 	}
666 
667 	return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
668 						 db_width, db_space);
669 }
670 
671 static void qedr_db_recovery_del(struct qedr_dev *dev,
672 				 void __iomem *db_addr,
673 				 void *db_data)
674 {
675 	if (!db_data) {
676 		DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
677 		return;
678 	}
679 
680 	/* Ignore return code as there is not much we can do about it. Error
681 	 * log will be printed inside.
682 	 */
683 	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
684 }
685 
686 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
687 			      struct qedr_cq *cq, struct ib_udata *udata,
688 			      u32 db_offset)
689 {
690 	struct qedr_create_cq_uresp uresp;
691 	int rc;
692 
693 	memset(&uresp, 0, sizeof(uresp));
694 
695 	uresp.db_offset = db_offset;
696 	uresp.icid = cq->icid;
697 	if (cq->q.db_mmap_entry)
698 		uresp.db_rec_addr =
699 			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
700 
701 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
702 	if (rc)
703 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
704 
705 	return rc;
706 }
707 
708 static void consume_cqe(struct qedr_cq *cq)
709 {
710 	if (cq->latest_cqe == cq->toggle_cqe)
711 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
712 
713 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
714 }
715 
716 static inline int qedr_align_cq_entries(int entries)
717 {
718 	u64 size, aligned_size;
719 
720 	/* We allocate an extra entry that we don't report to the FW. */
721 	size = (entries + 1) * QEDR_CQE_SIZE;
722 	aligned_size = ALIGN(size, PAGE_SIZE);
723 
724 	return aligned_size / QEDR_CQE_SIZE;
725 }
726 
727 static int qedr_init_user_db_rec(struct ib_udata *udata,
728 				 struct qedr_dev *dev, struct qedr_userq *q,
729 				 bool requires_db_rec)
730 {
731 	struct qedr_ucontext *uctx =
732 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
733 					  ibucontext);
734 	struct qedr_user_mmap_entry *entry;
735 	int rc;
736 
737 	/* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
738 	if (requires_db_rec == 0 || !uctx->db_rec)
739 		return 0;
740 
741 	/* Allocate a page for doorbell recovery, add to mmap */
742 	q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
743 	if (!q->db_rec_data) {
744 		DP_ERR(dev, "get_zeroed_page failed\n");
745 		return -ENOMEM;
746 	}
747 
748 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
749 	if (!entry)
750 		goto err_free_db_data;
751 
752 	entry->address = q->db_rec_data;
753 	entry->length = PAGE_SIZE;
754 	entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
755 	rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
756 					 &entry->rdma_entry,
757 					 PAGE_SIZE);
758 	if (rc)
759 		goto err_free_entry;
760 
761 	q->db_mmap_entry = &entry->rdma_entry;
762 
763 	return 0;
764 
765 err_free_entry:
766 	kfree(entry);
767 
768 err_free_db_data:
769 	free_page((unsigned long)q->db_rec_data);
770 	q->db_rec_data = NULL;
771 	return -ENOMEM;
772 }
773 
774 static inline int qedr_init_user_queue(struct ib_udata *udata,
775 				       struct qedr_dev *dev,
776 				       struct qedr_userq *q, u64 buf_addr,
777 				       size_t buf_len, bool requires_db_rec,
778 				       int access,
779 				       int alloc_and_init)
780 {
781 	u32 fw_pages;
782 	int rc;
783 
784 	q->buf_addr = buf_addr;
785 	q->buf_len = buf_len;
786 	q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
787 	if (IS_ERR(q->umem)) {
788 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
789 		       PTR_ERR(q->umem));
790 		return PTR_ERR(q->umem);
791 	}
792 
793 	fw_pages = ib_umem_page_count(q->umem) <<
794 	    (PAGE_SHIFT - FW_PAGE_SHIFT);
795 
796 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
797 	if (rc)
798 		goto err0;
799 
800 	if (alloc_and_init) {
801 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
802 		if (IS_ERR(q->pbl_tbl)) {
803 			rc = PTR_ERR(q->pbl_tbl);
804 			goto err0;
805 		}
806 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
807 				   FW_PAGE_SHIFT);
808 	} else {
809 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
810 		if (!q->pbl_tbl) {
811 			rc = -ENOMEM;
812 			goto err0;
813 		}
814 	}
815 
816 	/* mmap the user address used to store doorbell data for recovery */
817 	return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
818 
819 err0:
820 	ib_umem_release(q->umem);
821 	q->umem = NULL;
822 
823 	return rc;
824 }
825 
826 static inline void qedr_init_cq_params(struct qedr_cq *cq,
827 				       struct qedr_ucontext *ctx,
828 				       struct qedr_dev *dev, int vector,
829 				       int chain_entries, int page_cnt,
830 				       u64 pbl_ptr,
831 				       struct qed_rdma_create_cq_in_params
832 				       *params)
833 {
834 	memset(params, 0, sizeof(*params));
835 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
836 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
837 	params->cnq_id = vector;
838 	params->cq_size = chain_entries - 1;
839 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
840 	params->pbl_num_pages = page_cnt;
841 	params->pbl_ptr = pbl_ptr;
842 	params->pbl_two_level = 0;
843 }
844 
845 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
846 {
847 	cq->db.data.agg_flags = flags;
848 	cq->db.data.value = cpu_to_le32(cons);
849 	writeq(cq->db.raw, cq->db_addr);
850 }
851 
852 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
853 {
854 	struct qedr_cq *cq = get_qedr_cq(ibcq);
855 	unsigned long sflags;
856 	struct qedr_dev *dev;
857 
858 	dev = get_qedr_dev(ibcq->device);
859 
860 	if (cq->destroyed) {
861 		DP_ERR(dev,
862 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
863 		       cq, cq->icid);
864 		return -EINVAL;
865 	}
866 
867 
868 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
869 		return 0;
870 
871 	spin_lock_irqsave(&cq->cq_lock, sflags);
872 
873 	cq->arm_flags = 0;
874 
875 	if (flags & IB_CQ_SOLICITED)
876 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
877 
878 	if (flags & IB_CQ_NEXT_COMP)
879 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
880 
881 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
882 
883 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
884 
885 	return 0;
886 }
887 
888 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
889 		   struct ib_udata *udata)
890 {
891 	struct ib_device *ibdev = ibcq->device;
892 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
893 		udata, struct qedr_ucontext, ibucontext);
894 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
895 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
896 	struct qedr_dev *dev = get_qedr_dev(ibdev);
897 	struct qed_rdma_create_cq_in_params params;
898 	struct qedr_create_cq_ureq ureq = {};
899 	int vector = attr->comp_vector;
900 	int entries = attr->cqe;
901 	struct qedr_cq *cq = get_qedr_cq(ibcq);
902 	int chain_entries;
903 	u32 db_offset;
904 	int page_cnt;
905 	u64 pbl_ptr;
906 	u16 icid;
907 	int rc;
908 
909 	DP_DEBUG(dev, QEDR_MSG_INIT,
910 		 "create_cq: called from %s. entries=%d, vector=%d\n",
911 		 udata ? "User Lib" : "Kernel", entries, vector);
912 
913 	if (entries > QEDR_MAX_CQES) {
914 		DP_ERR(dev,
915 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
916 		       entries, QEDR_MAX_CQES);
917 		return -EINVAL;
918 	}
919 
920 	chain_entries = qedr_align_cq_entries(entries);
921 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
922 
923 	/* calc db offset. user will add DPI base, kernel will add db addr */
924 	db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
925 
926 	if (udata) {
927 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
928 							 udata->inlen))) {
929 			DP_ERR(dev,
930 			       "create cq: problem copying data from user space\n");
931 			goto err0;
932 		}
933 
934 		if (!ureq.len) {
935 			DP_ERR(dev,
936 			       "create cq: cannot create a cq with 0 entries\n");
937 			goto err0;
938 		}
939 
940 		cq->cq_type = QEDR_CQ_TYPE_USER;
941 
942 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
943 					  ureq.len, true, IB_ACCESS_LOCAL_WRITE,
944 					  1);
945 		if (rc)
946 			goto err0;
947 
948 		pbl_ptr = cq->q.pbl_tbl->pa;
949 		page_cnt = cq->q.pbl_info.num_pbes;
950 
951 		cq->ibcq.cqe = chain_entries;
952 		cq->q.db_addr = ctx->dpi_addr + db_offset;
953 	} else {
954 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
955 
956 		rc = dev->ops->common->chain_alloc(dev->cdev,
957 						   QED_CHAIN_USE_TO_CONSUME,
958 						   QED_CHAIN_MODE_PBL,
959 						   QED_CHAIN_CNT_TYPE_U32,
960 						   chain_entries,
961 						   sizeof(union rdma_cqe),
962 						   &cq->pbl, NULL);
963 		if (rc)
964 			goto err0;
965 
966 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
967 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
968 		cq->ibcq.cqe = cq->pbl.capacity;
969 	}
970 
971 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
972 			    pbl_ptr, &params);
973 
974 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
975 	if (rc)
976 		goto err1;
977 
978 	cq->icid = icid;
979 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
980 	spin_lock_init(&cq->cq_lock);
981 
982 	if (udata) {
983 		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
984 		if (rc)
985 			goto err2;
986 
987 		rc = qedr_db_recovery_add(dev, cq->q.db_addr,
988 					  &cq->q.db_rec_data->db_data,
989 					  DB_REC_WIDTH_64B,
990 					  DB_REC_USER);
991 		if (rc)
992 			goto err2;
993 
994 	} else {
995 		/* Generate doorbell address. */
996 		cq->db.data.icid = cq->icid;
997 		cq->db_addr = dev->db_addr + db_offset;
998 		cq->db.data.params = DB_AGG_CMD_SET <<
999 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1000 
1001 		/* point to the very last element, passing it we will toggle */
1002 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1003 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1004 		cq->latest_cqe = NULL;
1005 		consume_cqe(cq);
1006 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1007 
1008 		rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1009 					  DB_REC_WIDTH_64B, DB_REC_KERNEL);
1010 		if (rc)
1011 			goto err2;
1012 	}
1013 
1014 	DP_DEBUG(dev, QEDR_MSG_CQ,
1015 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1016 		 cq->icid, cq, params.cq_size);
1017 
1018 	return 0;
1019 
1020 err2:
1021 	destroy_iparams.icid = cq->icid;
1022 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1023 				  &destroy_oparams);
1024 err1:
1025 	if (udata) {
1026 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1027 		ib_umem_release(cq->q.umem);
1028 		if (cq->q.db_mmap_entry)
1029 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1030 	} else {
1031 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1032 	}
1033 err0:
1034 	return -EINVAL;
1035 }
1036 
1037 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1038 {
1039 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1040 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1041 
1042 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1043 
1044 	return 0;
1045 }
1046 
1047 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
1048 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
1049 
1050 void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1051 {
1052 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1053 	struct qed_rdma_destroy_cq_out_params oparams;
1054 	struct qed_rdma_destroy_cq_in_params iparams;
1055 	struct qedr_cq *cq = get_qedr_cq(ibcq);
1056 	int iter;
1057 
1058 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1059 
1060 	cq->destroyed = 1;
1061 
1062 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
1063 	if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1064 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1065 		return;
1066 	}
1067 
1068 	iparams.icid = cq->icid;
1069 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1070 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1071 
1072 	if (udata) {
1073 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1074 		ib_umem_release(cq->q.umem);
1075 
1076 		if (cq->q.db_rec_data) {
1077 			qedr_db_recovery_del(dev, cq->q.db_addr,
1078 					     &cq->q.db_rec_data->db_data);
1079 			rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1080 		}
1081 	} else {
1082 		qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1083 	}
1084 
1085 	/* We don't want the IRQ handler to handle a non-existing CQ so we
1086 	 * wait until all CNQ interrupts, if any, are received. This will always
1087 	 * happen and will always happen very fast. If not, then a serious error
1088 	 * has occured. That is why we can use a long delay.
1089 	 * We spin for a short time so we don’t lose time on context switching
1090 	 * in case all the completions are handled in that span. Otherwise
1091 	 * we sleep for a while and check again. Since the CNQ may be
1092 	 * associated with (only) the current CPU we use msleep to allow the
1093 	 * current CPU to be freed.
1094 	 * The CNQ notification is increased in qedr_irq_handler().
1095 	 */
1096 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1097 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1098 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1099 		iter--;
1100 	}
1101 
1102 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1103 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1104 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1105 		iter--;
1106 	}
1107 
1108 	/* Note that we don't need to have explicit code to wait for the
1109 	 * completion of the event handler because it is invoked from the EQ.
1110 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1111 	 * be certain that there's no event handler in process.
1112 	 */
1113 }
1114 
1115 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1116 					  struct ib_qp_attr *attr,
1117 					  int attr_mask,
1118 					  struct qed_rdma_modify_qp_in_params
1119 					  *qp_params)
1120 {
1121 	const struct ib_gid_attr *gid_attr;
1122 	enum rdma_network_type nw_type;
1123 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1124 	u32 ipv4_addr;
1125 	int ret;
1126 	int i;
1127 
1128 	gid_attr = grh->sgid_attr;
1129 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1130 	if (ret)
1131 		return ret;
1132 
1133 	nw_type = rdma_gid_attr_network_type(gid_attr);
1134 	switch (nw_type) {
1135 	case RDMA_NETWORK_IPV6:
1136 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1137 		       sizeof(qp_params->sgid));
1138 		memcpy(&qp_params->dgid.bytes[0],
1139 		       &grh->dgid,
1140 		       sizeof(qp_params->dgid));
1141 		qp_params->roce_mode = ROCE_V2_IPV6;
1142 		SET_FIELD(qp_params->modify_flags,
1143 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1144 		break;
1145 	case RDMA_NETWORK_IB:
1146 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1147 		       sizeof(qp_params->sgid));
1148 		memcpy(&qp_params->dgid.bytes[0],
1149 		       &grh->dgid,
1150 		       sizeof(qp_params->dgid));
1151 		qp_params->roce_mode = ROCE_V1;
1152 		break;
1153 	case RDMA_NETWORK_IPV4:
1154 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1155 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1156 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1157 		qp_params->sgid.ipv4_addr = ipv4_addr;
1158 		ipv4_addr =
1159 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1160 		qp_params->dgid.ipv4_addr = ipv4_addr;
1161 		SET_FIELD(qp_params->modify_flags,
1162 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1163 		qp_params->roce_mode = ROCE_V2_IPV4;
1164 		break;
1165 	}
1166 
1167 	for (i = 0; i < 4; i++) {
1168 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1169 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1170 	}
1171 
1172 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1173 		qp_params->vlan_id = 0;
1174 
1175 	return 0;
1176 }
1177 
1178 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1179 			       struct ib_qp_init_attr *attrs,
1180 			       struct ib_udata *udata)
1181 {
1182 	struct qedr_device_attr *qattr = &dev->attr;
1183 
1184 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1185 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1186 		DP_DEBUG(dev, QEDR_MSG_QP,
1187 			 "create qp: unsupported qp type=0x%x requested\n",
1188 			 attrs->qp_type);
1189 		return -EOPNOTSUPP;
1190 	}
1191 
1192 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1193 		DP_ERR(dev,
1194 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1195 		       attrs->cap.max_send_wr, qattr->max_sqe);
1196 		return -EINVAL;
1197 	}
1198 
1199 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1200 		DP_ERR(dev,
1201 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1202 		       attrs->cap.max_inline_data, qattr->max_inline);
1203 		return -EINVAL;
1204 	}
1205 
1206 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1207 		DP_ERR(dev,
1208 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1209 		       attrs->cap.max_send_sge, qattr->max_sge);
1210 		return -EINVAL;
1211 	}
1212 
1213 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1214 		DP_ERR(dev,
1215 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1216 		       attrs->cap.max_recv_sge, qattr->max_sge);
1217 		return -EINVAL;
1218 	}
1219 
1220 	/* Unprivileged user space cannot create special QP */
1221 	if (udata && attrs->qp_type == IB_QPT_GSI) {
1222 		DP_ERR(dev,
1223 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1224 		       attrs->qp_type);
1225 		return -EINVAL;
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1232 			       struct qedr_srq *srq, struct ib_udata *udata)
1233 {
1234 	struct qedr_create_srq_uresp uresp = {};
1235 	int rc;
1236 
1237 	uresp.srq_id = srq->srq_id;
1238 
1239 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1240 	if (rc)
1241 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1242 
1243 	return rc;
1244 }
1245 
1246 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1247 			      struct qedr_create_qp_uresp *uresp,
1248 			      struct qedr_qp *qp)
1249 {
1250 	/* iWARP requires two doorbells per RQ. */
1251 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1252 		uresp->rq_db_offset =
1253 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1254 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1255 	} else {
1256 		uresp->rq_db_offset =
1257 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1258 	}
1259 
1260 	uresp->rq_icid = qp->icid;
1261 	if (qp->urq.db_mmap_entry)
1262 		uresp->rq_db_rec_addr =
1263 			rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1264 }
1265 
1266 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1267 			       struct qedr_create_qp_uresp *uresp,
1268 			       struct qedr_qp *qp)
1269 {
1270 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1271 
1272 	/* iWARP uses the same cid for rq and sq */
1273 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1274 		uresp->sq_icid = qp->icid;
1275 	else
1276 		uresp->sq_icid = qp->icid + 1;
1277 
1278 	if (qp->usq.db_mmap_entry)
1279 		uresp->sq_db_rec_addr =
1280 			rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1281 }
1282 
1283 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1284 			      struct qedr_qp *qp, struct ib_udata *udata,
1285 			      struct qedr_create_qp_uresp *uresp)
1286 {
1287 	int rc;
1288 
1289 	memset(uresp, 0, sizeof(*uresp));
1290 	qedr_copy_sq_uresp(dev, uresp, qp);
1291 	qedr_copy_rq_uresp(dev, uresp, qp);
1292 
1293 	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1294 	uresp->qp_id = qp->qp_id;
1295 
1296 	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1297 	if (rc)
1298 		DP_ERR(dev,
1299 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1300 		       qp->icid);
1301 
1302 	return rc;
1303 }
1304 
1305 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1306 				      struct qedr_qp *qp,
1307 				      struct qedr_pd *pd,
1308 				      struct ib_qp_init_attr *attrs)
1309 {
1310 	spin_lock_init(&qp->q_lock);
1311 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1312 		kref_init(&qp->refcnt);
1313 		init_completion(&qp->iwarp_cm_comp);
1314 	}
1315 	qp->pd = pd;
1316 	qp->qp_type = attrs->qp_type;
1317 	qp->max_inline_data = attrs->cap.max_inline_data;
1318 	qp->sq.max_sges = attrs->cap.max_send_sge;
1319 	qp->state = QED_ROCE_QP_STATE_RESET;
1320 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1321 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1322 	qp->dev = dev;
1323 
1324 	if (attrs->srq) {
1325 		qp->srq = get_qedr_srq(attrs->srq);
1326 	} else {
1327 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1328 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1329 		DP_DEBUG(dev, QEDR_MSG_QP,
1330 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1331 			 qp->rq.max_sges, qp->rq_cq->icid);
1332 	}
1333 
1334 	DP_DEBUG(dev, QEDR_MSG_QP,
1335 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1336 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1337 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1338 	DP_DEBUG(dev, QEDR_MSG_QP,
1339 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1340 		 qp->sq.max_sges, qp->sq_cq->icid);
1341 }
1342 
1343 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1344 {
1345 	int rc;
1346 
1347 	qp->sq.db = dev->db_addr +
1348 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1349 	qp->sq.db_data.data.icid = qp->icid + 1;
1350 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1351 				  &qp->sq.db_data,
1352 				  DB_REC_WIDTH_32B,
1353 				  DB_REC_KERNEL);
1354 	if (rc)
1355 		return rc;
1356 
1357 	if (!qp->srq) {
1358 		qp->rq.db = dev->db_addr +
1359 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1360 		qp->rq.db_data.data.icid = qp->icid;
1361 
1362 		rc = qedr_db_recovery_add(dev, qp->rq.db,
1363 					  &qp->rq.db_data,
1364 					  DB_REC_WIDTH_32B,
1365 					  DB_REC_KERNEL);
1366 		if (rc)
1367 			qedr_db_recovery_del(dev, qp->sq.db,
1368 					     &qp->sq.db_data);
1369 	}
1370 
1371 	return rc;
1372 }
1373 
1374 static int qedr_check_srq_params(struct qedr_dev *dev,
1375 				 struct ib_srq_init_attr *attrs,
1376 				 struct ib_udata *udata)
1377 {
1378 	struct qedr_device_attr *qattr = &dev->attr;
1379 
1380 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1381 		DP_ERR(dev,
1382 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1383 		       attrs->attr.max_wr, qattr->max_srq_wr);
1384 		return -EINVAL;
1385 	}
1386 
1387 	if (attrs->attr.max_sge > qattr->max_sge) {
1388 		DP_ERR(dev,
1389 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1390 		       attrs->attr.max_sge, qattr->max_sge);
1391 		return -EINVAL;
1392 	}
1393 
1394 	return 0;
1395 }
1396 
1397 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1398 {
1399 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1400 	ib_umem_release(srq->usrq.umem);
1401 	ib_umem_release(srq->prod_umem);
1402 }
1403 
1404 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1405 {
1406 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1407 	struct qedr_dev *dev = srq->dev;
1408 
1409 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1410 
1411 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1412 			  hw_srq->virt_prod_pair_addr,
1413 			  hw_srq->phy_prod_pair_addr);
1414 }
1415 
1416 static int qedr_init_srq_user_params(struct ib_udata *udata,
1417 				     struct qedr_srq *srq,
1418 				     struct qedr_create_srq_ureq *ureq,
1419 				     int access)
1420 {
1421 	struct scatterlist *sg;
1422 	int rc;
1423 
1424 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1425 				  ureq->srq_len, false, access, 1);
1426 	if (rc)
1427 		return rc;
1428 
1429 	srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1430 				     sizeof(struct rdma_srq_producers), access);
1431 	if (IS_ERR(srq->prod_umem)) {
1432 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1433 		ib_umem_release(srq->usrq.umem);
1434 		DP_ERR(srq->dev,
1435 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1436 		       PTR_ERR(srq->prod_umem));
1437 		return PTR_ERR(srq->prod_umem);
1438 	}
1439 
1440 	sg = srq->prod_umem->sg_head.sgl;
1441 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1442 
1443 	return 0;
1444 }
1445 
1446 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1447 					struct qedr_dev *dev,
1448 					struct ib_srq_init_attr *init_attr)
1449 {
1450 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1451 	dma_addr_t phy_prod_pair_addr;
1452 	u32 num_elems;
1453 	void *va;
1454 	int rc;
1455 
1456 	va = dma_alloc_coherent(&dev->pdev->dev,
1457 				sizeof(struct rdma_srq_producers),
1458 				&phy_prod_pair_addr, GFP_KERNEL);
1459 	if (!va) {
1460 		DP_ERR(dev,
1461 		       "create srq: failed to allocate dma memory for producer\n");
1462 		return -ENOMEM;
1463 	}
1464 
1465 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1466 	hw_srq->virt_prod_pair_addr = va;
1467 
1468 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1469 	rc = dev->ops->common->chain_alloc(dev->cdev,
1470 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1471 					   QED_CHAIN_MODE_PBL,
1472 					   QED_CHAIN_CNT_TYPE_U32,
1473 					   num_elems,
1474 					   QEDR_SRQ_WQE_ELEM_SIZE,
1475 					   &hw_srq->pbl, NULL);
1476 	if (rc)
1477 		goto err0;
1478 
1479 	hw_srq->num_elems = num_elems;
1480 
1481 	return 0;
1482 
1483 err0:
1484 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1485 			  va, phy_prod_pair_addr);
1486 	return rc;
1487 }
1488 
1489 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1490 		    struct ib_udata *udata)
1491 {
1492 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1493 	struct qed_rdma_create_srq_in_params in_params = {};
1494 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1495 	struct qed_rdma_create_srq_out_params out_params;
1496 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1497 	struct qedr_create_srq_ureq ureq = {};
1498 	u64 pbl_base_addr, phy_prod_pair_addr;
1499 	struct qedr_srq_hwq_info *hw_srq;
1500 	u32 page_cnt, page_size;
1501 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1502 	int rc = 0;
1503 
1504 	DP_DEBUG(dev, QEDR_MSG_QP,
1505 		 "create SRQ called from %s (pd %p)\n",
1506 		 (udata) ? "User lib" : "kernel", pd);
1507 
1508 	rc = qedr_check_srq_params(dev, init_attr, udata);
1509 	if (rc)
1510 		return -EINVAL;
1511 
1512 	srq->dev = dev;
1513 	hw_srq = &srq->hw_srq;
1514 	spin_lock_init(&srq->lock);
1515 
1516 	hw_srq->max_wr = init_attr->attr.max_wr;
1517 	hw_srq->max_sges = init_attr->attr.max_sge;
1518 
1519 	if (udata) {
1520 		if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1521 							 udata->inlen))) {
1522 			DP_ERR(dev,
1523 			       "create srq: problem copying data from user space\n");
1524 			goto err0;
1525 		}
1526 
1527 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1528 		if (rc)
1529 			goto err0;
1530 
1531 		page_cnt = srq->usrq.pbl_info.num_pbes;
1532 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1533 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1534 		page_size = PAGE_SIZE;
1535 	} else {
1536 		struct qed_chain *pbl;
1537 
1538 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1539 		if (rc)
1540 			goto err0;
1541 
1542 		pbl = &hw_srq->pbl;
1543 		page_cnt = qed_chain_get_page_cnt(pbl);
1544 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1545 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1546 		page_size = QED_CHAIN_PAGE_SIZE;
1547 	}
1548 
1549 	in_params.pd_id = pd->pd_id;
1550 	in_params.pbl_base_addr = pbl_base_addr;
1551 	in_params.prod_pair_addr = phy_prod_pair_addr;
1552 	in_params.num_pages = page_cnt;
1553 	in_params.page_size = page_size;
1554 
1555 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1556 	if (rc)
1557 		goto err1;
1558 
1559 	srq->srq_id = out_params.srq_id;
1560 
1561 	if (udata) {
1562 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1563 		if (rc)
1564 			goto err2;
1565 	}
1566 
1567 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1568 	if (rc)
1569 		goto err2;
1570 
1571 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1572 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1573 	return 0;
1574 
1575 err2:
1576 	destroy_in_params.srq_id = srq->srq_id;
1577 
1578 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1579 err1:
1580 	if (udata)
1581 		qedr_free_srq_user_params(srq);
1582 	else
1583 		qedr_free_srq_kernel_params(srq);
1584 err0:
1585 	return -EFAULT;
1586 }
1587 
1588 void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1589 {
1590 	struct qed_rdma_destroy_srq_in_params in_params = {};
1591 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1592 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1593 
1594 	xa_erase_irq(&dev->srqs, srq->srq_id);
1595 	in_params.srq_id = srq->srq_id;
1596 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1597 
1598 	if (ibsrq->uobject)
1599 		qedr_free_srq_user_params(srq);
1600 	else
1601 		qedr_free_srq_kernel_params(srq);
1602 
1603 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1604 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1605 		 srq->srq_id);
1606 }
1607 
1608 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1609 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1610 {
1611 	struct qed_rdma_modify_srq_in_params in_params = {};
1612 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1613 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1614 	int rc;
1615 
1616 	if (attr_mask & IB_SRQ_MAX_WR) {
1617 		DP_ERR(dev,
1618 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1619 		       attr_mask, srq);
1620 		return -EINVAL;
1621 	}
1622 
1623 	if (attr_mask & IB_SRQ_LIMIT) {
1624 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1625 			DP_ERR(dev,
1626 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1627 			       attr->srq_limit, srq->hw_srq.max_wr);
1628 			return -EINVAL;
1629 		}
1630 
1631 		in_params.srq_id = srq->srq_id;
1632 		in_params.wqe_limit = attr->srq_limit;
1633 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1634 		if (rc)
1635 			return rc;
1636 	}
1637 
1638 	srq->srq_limit = attr->srq_limit;
1639 
1640 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1641 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1642 
1643 	return 0;
1644 }
1645 
1646 static inline void
1647 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1648 			      struct qedr_pd *pd,
1649 			      struct qedr_qp *qp,
1650 			      struct ib_qp_init_attr *attrs,
1651 			      bool fmr_and_reserved_lkey,
1652 			      struct qed_rdma_create_qp_in_params *params)
1653 {
1654 	/* QP handle to be written in an async event */
1655 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1656 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1657 
1658 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1659 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1660 	params->pd = pd->pd_id;
1661 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1662 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1663 	params->stats_queue = 0;
1664 	params->srq_id = 0;
1665 	params->use_srq = false;
1666 
1667 	if (!qp->srq) {
1668 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1669 
1670 	} else {
1671 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1672 		params->srq_id = qp->srq->srq_id;
1673 		params->use_srq = true;
1674 	}
1675 }
1676 
1677 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1678 {
1679 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1680 		 "qp=%p. "
1681 		 "sq_addr=0x%llx, "
1682 		 "sq_len=%zd, "
1683 		 "rq_addr=0x%llx, "
1684 		 "rq_len=%zd"
1685 		 "\n",
1686 		 qp,
1687 		 qp->usq.buf_addr,
1688 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1689 }
1690 
1691 static inline void
1692 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1693 			    struct qedr_qp *qp,
1694 			    struct qed_rdma_create_qp_out_params *out_params)
1695 {
1696 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1697 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1698 
1699 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1700 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1701 	if (!qp->srq) {
1702 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1703 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1704 	}
1705 
1706 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1707 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1708 }
1709 
1710 static void qedr_cleanup_user(struct qedr_dev *dev,
1711 			      struct qedr_ucontext *ctx,
1712 			      struct qedr_qp *qp)
1713 {
1714 	ib_umem_release(qp->usq.umem);
1715 	qp->usq.umem = NULL;
1716 
1717 	ib_umem_release(qp->urq.umem);
1718 	qp->urq.umem = NULL;
1719 
1720 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
1721 		qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1722 		qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1723 	} else {
1724 		kfree(qp->usq.pbl_tbl);
1725 		kfree(qp->urq.pbl_tbl);
1726 	}
1727 
1728 	if (qp->usq.db_rec_data) {
1729 		qedr_db_recovery_del(dev, qp->usq.db_addr,
1730 				     &qp->usq.db_rec_data->db_data);
1731 		rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1732 	}
1733 
1734 	if (qp->urq.db_rec_data) {
1735 		qedr_db_recovery_del(dev, qp->urq.db_addr,
1736 				     &qp->urq.db_rec_data->db_data);
1737 		rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1738 	}
1739 
1740 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1741 		qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1742 				     &qp->urq.db_rec_db2_data);
1743 }
1744 
1745 static int qedr_create_user_qp(struct qedr_dev *dev,
1746 			       struct qedr_qp *qp,
1747 			       struct ib_pd *ibpd,
1748 			       struct ib_udata *udata,
1749 			       struct ib_qp_init_attr *attrs)
1750 {
1751 	struct qed_rdma_create_qp_in_params in_params;
1752 	struct qed_rdma_create_qp_out_params out_params;
1753 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1754 	struct qedr_create_qp_uresp uresp;
1755 	struct qedr_ucontext *ctx = NULL;
1756 	struct qedr_create_qp_ureq ureq;
1757 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1758 	int rc = -EINVAL;
1759 
1760 	qp->create_type = QEDR_QP_CREATE_USER;
1761 	memset(&ureq, 0, sizeof(ureq));
1762 	rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
1763 	if (rc) {
1764 		DP_ERR(dev, "Problem copying data from user space\n");
1765 		return rc;
1766 	}
1767 
1768 	/* SQ - read access only (0) */
1769 	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1770 				  ureq.sq_len, true, 0, alloc_and_init);
1771 	if (rc)
1772 		return rc;
1773 
1774 	if (!qp->srq) {
1775 		/* RQ - read access only (0) */
1776 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1777 					  ureq.rq_len, true, 0, alloc_and_init);
1778 		if (rc)
1779 			return rc;
1780 	}
1781 
1782 	memset(&in_params, 0, sizeof(in_params));
1783 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1784 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1785 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1786 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1787 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1788 	if (!qp->srq) {
1789 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1790 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1791 	}
1792 
1793 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1794 					      &in_params, &out_params);
1795 
1796 	if (!qp->qed_qp) {
1797 		rc = -ENOMEM;
1798 		goto err1;
1799 	}
1800 
1801 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1802 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1803 
1804 	qp->qp_id = out_params.qp_id;
1805 	qp->icid = out_params.icid;
1806 
1807 	rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1808 	if (rc)
1809 		goto err;
1810 
1811 	/* db offset was calculated in copy_qp_uresp, now set in the user q */
1812 	ctx = pd->uctx;
1813 	qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1814 	qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1815 
1816 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1817 		qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
1818 
1819 		/* calculate the db_rec_db2 data since it is constant so no
1820 		 *  need to reflect from user
1821 		 */
1822 		qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
1823 		qp->urq.db_rec_db2_data.data.value =
1824 			cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
1825 	}
1826 
1827 	rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1828 				  &qp->usq.db_rec_data->db_data,
1829 				  DB_REC_WIDTH_32B,
1830 				  DB_REC_USER);
1831 	if (rc)
1832 		goto err;
1833 
1834 	rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1835 				  &qp->urq.db_rec_data->db_data,
1836 				  DB_REC_WIDTH_32B,
1837 				  DB_REC_USER);
1838 	if (rc)
1839 		goto err;
1840 
1841 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1842 		rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1843 					  &qp->urq.db_rec_db2_data,
1844 					  DB_REC_WIDTH_32B,
1845 					  DB_REC_USER);
1846 		if (rc)
1847 			goto err;
1848 	}
1849 	qedr_qp_user_print(dev, qp);
1850 
1851 	return rc;
1852 err:
1853 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1854 	if (rc)
1855 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1856 
1857 err1:
1858 	qedr_cleanup_user(dev, ctx, qp);
1859 	return rc;
1860 }
1861 
1862 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1863 {
1864 	int rc;
1865 
1866 	qp->sq.db = dev->db_addr +
1867 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1868 	qp->sq.db_data.data.icid = qp->icid;
1869 
1870 	rc = qedr_db_recovery_add(dev, qp->sq.db,
1871 				  &qp->sq.db_data,
1872 				  DB_REC_WIDTH_32B,
1873 				  DB_REC_KERNEL);
1874 	if (rc)
1875 		return rc;
1876 
1877 	qp->rq.db = dev->db_addr +
1878 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1879 	qp->rq.db_data.data.icid = qp->icid;
1880 	qp->rq.iwarp_db2 = dev->db_addr +
1881 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1882 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1883 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1884 
1885 	rc = qedr_db_recovery_add(dev, qp->rq.db,
1886 				  &qp->rq.db_data,
1887 				  DB_REC_WIDTH_32B,
1888 				  DB_REC_KERNEL);
1889 	if (rc)
1890 		return rc;
1891 
1892 	rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
1893 				  &qp->rq.iwarp_db2_data,
1894 				  DB_REC_WIDTH_32B,
1895 				  DB_REC_KERNEL);
1896 	return rc;
1897 }
1898 
1899 static int
1900 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1901 			   struct qedr_qp *qp,
1902 			   struct qed_rdma_create_qp_in_params *in_params,
1903 			   u32 n_sq_elems, u32 n_rq_elems)
1904 {
1905 	struct qed_rdma_create_qp_out_params out_params;
1906 	int rc;
1907 
1908 	rc = dev->ops->common->chain_alloc(dev->cdev,
1909 					   QED_CHAIN_USE_TO_PRODUCE,
1910 					   QED_CHAIN_MODE_PBL,
1911 					   QED_CHAIN_CNT_TYPE_U32,
1912 					   n_sq_elems,
1913 					   QEDR_SQE_ELEMENT_SIZE,
1914 					   &qp->sq.pbl, NULL);
1915 
1916 	if (rc)
1917 		return rc;
1918 
1919 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1920 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1921 
1922 	rc = dev->ops->common->chain_alloc(dev->cdev,
1923 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1924 					   QED_CHAIN_MODE_PBL,
1925 					   QED_CHAIN_CNT_TYPE_U32,
1926 					   n_rq_elems,
1927 					   QEDR_RQE_ELEMENT_SIZE,
1928 					   &qp->rq.pbl, NULL);
1929 	if (rc)
1930 		return rc;
1931 
1932 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1933 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1934 
1935 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1936 					      in_params, &out_params);
1937 
1938 	if (!qp->qed_qp)
1939 		return -EINVAL;
1940 
1941 	qp->qp_id = out_params.qp_id;
1942 	qp->icid = out_params.icid;
1943 
1944 	return qedr_set_roce_db_info(dev, qp);
1945 }
1946 
1947 static int
1948 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1949 			    struct qedr_qp *qp,
1950 			    struct qed_rdma_create_qp_in_params *in_params,
1951 			    u32 n_sq_elems, u32 n_rq_elems)
1952 {
1953 	struct qed_rdma_create_qp_out_params out_params;
1954 	struct qed_chain_ext_pbl ext_pbl;
1955 	int rc;
1956 
1957 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1958 						     QEDR_SQE_ELEMENT_SIZE,
1959 						     QED_CHAIN_MODE_PBL);
1960 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1961 						     QEDR_RQE_ELEMENT_SIZE,
1962 						     QED_CHAIN_MODE_PBL);
1963 
1964 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1965 					      in_params, &out_params);
1966 
1967 	if (!qp->qed_qp)
1968 		return -EINVAL;
1969 
1970 	/* Now we allocate the chain */
1971 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1972 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1973 
1974 	rc = dev->ops->common->chain_alloc(dev->cdev,
1975 					   QED_CHAIN_USE_TO_PRODUCE,
1976 					   QED_CHAIN_MODE_PBL,
1977 					   QED_CHAIN_CNT_TYPE_U32,
1978 					   n_sq_elems,
1979 					   QEDR_SQE_ELEMENT_SIZE,
1980 					   &qp->sq.pbl, &ext_pbl);
1981 
1982 	if (rc)
1983 		goto err;
1984 
1985 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1986 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1987 
1988 	rc = dev->ops->common->chain_alloc(dev->cdev,
1989 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1990 					   QED_CHAIN_MODE_PBL,
1991 					   QED_CHAIN_CNT_TYPE_U32,
1992 					   n_rq_elems,
1993 					   QEDR_RQE_ELEMENT_SIZE,
1994 					   &qp->rq.pbl, &ext_pbl);
1995 
1996 	if (rc)
1997 		goto err;
1998 
1999 	qp->qp_id = out_params.qp_id;
2000 	qp->icid = out_params.icid;
2001 
2002 	return qedr_set_iwarp_db_info(dev, qp);
2003 
2004 err:
2005 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2006 
2007 	return rc;
2008 }
2009 
2010 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2011 {
2012 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2013 	kfree(qp->wqe_wr_id);
2014 
2015 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2016 	kfree(qp->rqe_wr_id);
2017 
2018 	/* GSI qp is not registered to db mechanism so no need to delete */
2019 	if (qp->qp_type == IB_QPT_GSI)
2020 		return;
2021 
2022 	qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2023 
2024 	if (!qp->srq) {
2025 		qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2026 
2027 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2028 			qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2029 					     &qp->rq.iwarp_db2_data);
2030 	}
2031 }
2032 
2033 static int qedr_create_kernel_qp(struct qedr_dev *dev,
2034 				 struct qedr_qp *qp,
2035 				 struct ib_pd *ibpd,
2036 				 struct ib_qp_init_attr *attrs)
2037 {
2038 	struct qed_rdma_create_qp_in_params in_params;
2039 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2040 	int rc = -EINVAL;
2041 	u32 n_rq_elems;
2042 	u32 n_sq_elems;
2043 	u32 n_sq_entries;
2044 
2045 	memset(&in_params, 0, sizeof(in_params));
2046 	qp->create_type = QEDR_QP_CREATE_KERNEL;
2047 
2048 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2049 	 * the ring. The ring should allow at least a single WR, even if the
2050 	 * user requested none, due to allocation issues.
2051 	 * We should add an extra WR since the prod and cons indices of
2052 	 * wqe_wr_id are managed in such a way that the WQ is considered full
2053 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
2054 	 * double the number of entries due an iSER issue that pushes far more
2055 	 * WRs than indicated. If we decline its ib_post_send() then we get
2056 	 * error prints in the dmesg we'd like to avoid.
2057 	 */
2058 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2059 			      dev->attr.max_sqe);
2060 
2061 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2062 				GFP_KERNEL);
2063 	if (!qp->wqe_wr_id) {
2064 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2065 		return -ENOMEM;
2066 	}
2067 
2068 	/* QP handle to be written in CQE */
2069 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2070 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2071 
2072 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2073 	 * the ring. There ring should allow at least a single WR, even if the
2074 	 * user requested none, due to allocation issues.
2075 	 */
2076 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2077 
2078 	/* Allocate driver internal RQ array */
2079 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2080 				GFP_KERNEL);
2081 	if (!qp->rqe_wr_id) {
2082 		DP_ERR(dev,
2083 		       "create qp: failed RQ shadow memory allocation\n");
2084 		kfree(qp->wqe_wr_id);
2085 		return -ENOMEM;
2086 	}
2087 
2088 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2089 
2090 	n_sq_entries = attrs->cap.max_send_wr;
2091 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2092 	n_sq_entries = max_t(u32, n_sq_entries, 1);
2093 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2094 
2095 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2096 
2097 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2098 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2099 						 n_sq_elems, n_rq_elems);
2100 	else
2101 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2102 						n_sq_elems, n_rq_elems);
2103 	if (rc)
2104 		qedr_cleanup_kernel(dev, qp);
2105 
2106 	return rc;
2107 }
2108 
2109 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
2110 			     struct ib_qp_init_attr *attrs,
2111 			     struct ib_udata *udata)
2112 {
2113 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2114 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2115 	struct qedr_qp *qp;
2116 	struct ib_qp *ibqp;
2117 	int rc = 0;
2118 
2119 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2120 		 udata ? "user library" : "kernel", pd);
2121 
2122 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2123 	if (rc)
2124 		return ERR_PTR(rc);
2125 
2126 	DP_DEBUG(dev, QEDR_MSG_QP,
2127 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2128 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
2129 		 get_qedr_cq(attrs->send_cq),
2130 		 get_qedr_cq(attrs->send_cq)->icid,
2131 		 get_qedr_cq(attrs->recv_cq),
2132 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2133 
2134 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2135 	if (!qp) {
2136 		DP_ERR(dev, "create qp: failed allocating memory\n");
2137 		return ERR_PTR(-ENOMEM);
2138 	}
2139 
2140 	qedr_set_common_qp_params(dev, qp, pd, attrs);
2141 
2142 	if (attrs->qp_type == IB_QPT_GSI) {
2143 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
2144 		if (IS_ERR(ibqp))
2145 			kfree(qp);
2146 		return ibqp;
2147 	}
2148 
2149 	if (udata)
2150 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2151 	else
2152 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2153 
2154 	if (rc)
2155 		goto err;
2156 
2157 	qp->ibqp.qp_num = qp->qp_id;
2158 
2159 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2160 		rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2161 		if (rc)
2162 			goto err;
2163 	}
2164 
2165 	return &qp->ibqp;
2166 
2167 err:
2168 	kfree(qp);
2169 
2170 	return ERR_PTR(-EFAULT);
2171 }
2172 
2173 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2174 {
2175 	switch (qp_state) {
2176 	case QED_ROCE_QP_STATE_RESET:
2177 		return IB_QPS_RESET;
2178 	case QED_ROCE_QP_STATE_INIT:
2179 		return IB_QPS_INIT;
2180 	case QED_ROCE_QP_STATE_RTR:
2181 		return IB_QPS_RTR;
2182 	case QED_ROCE_QP_STATE_RTS:
2183 		return IB_QPS_RTS;
2184 	case QED_ROCE_QP_STATE_SQD:
2185 		return IB_QPS_SQD;
2186 	case QED_ROCE_QP_STATE_ERR:
2187 		return IB_QPS_ERR;
2188 	case QED_ROCE_QP_STATE_SQE:
2189 		return IB_QPS_SQE;
2190 	}
2191 	return IB_QPS_ERR;
2192 }
2193 
2194 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2195 					enum ib_qp_state qp_state)
2196 {
2197 	switch (qp_state) {
2198 	case IB_QPS_RESET:
2199 		return QED_ROCE_QP_STATE_RESET;
2200 	case IB_QPS_INIT:
2201 		return QED_ROCE_QP_STATE_INIT;
2202 	case IB_QPS_RTR:
2203 		return QED_ROCE_QP_STATE_RTR;
2204 	case IB_QPS_RTS:
2205 		return QED_ROCE_QP_STATE_RTS;
2206 	case IB_QPS_SQD:
2207 		return QED_ROCE_QP_STATE_SQD;
2208 	case IB_QPS_ERR:
2209 		return QED_ROCE_QP_STATE_ERR;
2210 	default:
2211 		return QED_ROCE_QP_STATE_ERR;
2212 	}
2213 }
2214 
2215 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2216 {
2217 	qed_chain_reset(&qph->pbl);
2218 	qph->prod = 0;
2219 	qph->cons = 0;
2220 	qph->wqe_cons = 0;
2221 	qph->db_data.data.value = cpu_to_le16(0);
2222 }
2223 
2224 static int qedr_update_qp_state(struct qedr_dev *dev,
2225 				struct qedr_qp *qp,
2226 				enum qed_roce_qp_state cur_state,
2227 				enum qed_roce_qp_state new_state)
2228 {
2229 	int status = 0;
2230 
2231 	if (new_state == cur_state)
2232 		return 0;
2233 
2234 	switch (cur_state) {
2235 	case QED_ROCE_QP_STATE_RESET:
2236 		switch (new_state) {
2237 		case QED_ROCE_QP_STATE_INIT:
2238 			qp->prev_wqe_size = 0;
2239 			qedr_reset_qp_hwq_info(&qp->sq);
2240 			qedr_reset_qp_hwq_info(&qp->rq);
2241 			break;
2242 		default:
2243 			status = -EINVAL;
2244 			break;
2245 		}
2246 		break;
2247 	case QED_ROCE_QP_STATE_INIT:
2248 		switch (new_state) {
2249 		case QED_ROCE_QP_STATE_RTR:
2250 			/* Update doorbell (in case post_recv was
2251 			 * done before move to RTR)
2252 			 */
2253 
2254 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2255 				writel(qp->rq.db_data.raw, qp->rq.db);
2256 			}
2257 			break;
2258 		case QED_ROCE_QP_STATE_ERR:
2259 			break;
2260 		default:
2261 			/* Invalid state change. */
2262 			status = -EINVAL;
2263 			break;
2264 		}
2265 		break;
2266 	case QED_ROCE_QP_STATE_RTR:
2267 		/* RTR->XXX */
2268 		switch (new_state) {
2269 		case QED_ROCE_QP_STATE_RTS:
2270 			break;
2271 		case QED_ROCE_QP_STATE_ERR:
2272 			break;
2273 		default:
2274 			/* Invalid state change. */
2275 			status = -EINVAL;
2276 			break;
2277 		}
2278 		break;
2279 	case QED_ROCE_QP_STATE_RTS:
2280 		/* RTS->XXX */
2281 		switch (new_state) {
2282 		case QED_ROCE_QP_STATE_SQD:
2283 			break;
2284 		case QED_ROCE_QP_STATE_ERR:
2285 			break;
2286 		default:
2287 			/* Invalid state change. */
2288 			status = -EINVAL;
2289 			break;
2290 		}
2291 		break;
2292 	case QED_ROCE_QP_STATE_SQD:
2293 		/* SQD->XXX */
2294 		switch (new_state) {
2295 		case QED_ROCE_QP_STATE_RTS:
2296 		case QED_ROCE_QP_STATE_ERR:
2297 			break;
2298 		default:
2299 			/* Invalid state change. */
2300 			status = -EINVAL;
2301 			break;
2302 		}
2303 		break;
2304 	case QED_ROCE_QP_STATE_ERR:
2305 		/* ERR->XXX */
2306 		switch (new_state) {
2307 		case QED_ROCE_QP_STATE_RESET:
2308 			if ((qp->rq.prod != qp->rq.cons) ||
2309 			    (qp->sq.prod != qp->sq.cons)) {
2310 				DP_NOTICE(dev,
2311 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2312 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2313 					  qp->sq.cons);
2314 				status = -EINVAL;
2315 			}
2316 			break;
2317 		default:
2318 			status = -EINVAL;
2319 			break;
2320 		}
2321 		break;
2322 	default:
2323 		status = -EINVAL;
2324 		break;
2325 	}
2326 
2327 	return status;
2328 }
2329 
2330 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2331 		   int attr_mask, struct ib_udata *udata)
2332 {
2333 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2334 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2335 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2336 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2337 	enum ib_qp_state old_qp_state, new_qp_state;
2338 	enum qed_roce_qp_state cur_state;
2339 	int rc = 0;
2340 
2341 	DP_DEBUG(dev, QEDR_MSG_QP,
2342 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2343 		 attr->qp_state);
2344 
2345 	old_qp_state = qedr_get_ibqp_state(qp->state);
2346 	if (attr_mask & IB_QP_STATE)
2347 		new_qp_state = attr->qp_state;
2348 	else
2349 		new_qp_state = old_qp_state;
2350 
2351 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2352 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2353 					ibqp->qp_type, attr_mask)) {
2354 			DP_ERR(dev,
2355 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2356 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2357 			       attr_mask, qp->qp_id, ibqp->qp_type,
2358 			       old_qp_state, new_qp_state);
2359 			rc = -EINVAL;
2360 			goto err;
2361 		}
2362 	}
2363 
2364 	/* Translate the masks... */
2365 	if (attr_mask & IB_QP_STATE) {
2366 		SET_FIELD(qp_params.modify_flags,
2367 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2368 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2369 	}
2370 
2371 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2372 		qp_params.sqd_async = true;
2373 
2374 	if (attr_mask & IB_QP_PKEY_INDEX) {
2375 		SET_FIELD(qp_params.modify_flags,
2376 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2377 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2378 			rc = -EINVAL;
2379 			goto err;
2380 		}
2381 
2382 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2383 	}
2384 
2385 	if (attr_mask & IB_QP_QKEY)
2386 		qp->qkey = attr->qkey;
2387 
2388 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2389 		SET_FIELD(qp_params.modify_flags,
2390 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2391 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2392 						  IB_ACCESS_REMOTE_READ;
2393 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2394 						   IB_ACCESS_REMOTE_WRITE;
2395 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2396 					       IB_ACCESS_REMOTE_ATOMIC;
2397 	}
2398 
2399 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2400 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2401 			return -EINVAL;
2402 
2403 		if (attr_mask & IB_QP_PATH_MTU) {
2404 			if (attr->path_mtu < IB_MTU_256 ||
2405 			    attr->path_mtu > IB_MTU_4096) {
2406 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2407 				rc = -EINVAL;
2408 				goto err;
2409 			}
2410 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2411 				      ib_mtu_enum_to_int(iboe_get_mtu
2412 							 (dev->ndev->mtu)));
2413 		}
2414 
2415 		if (!qp->mtu) {
2416 			qp->mtu =
2417 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2418 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2419 		}
2420 
2421 		SET_FIELD(qp_params.modify_flags,
2422 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2423 
2424 		qp_params.traffic_class_tos = grh->traffic_class;
2425 		qp_params.flow_label = grh->flow_label;
2426 		qp_params.hop_limit_ttl = grh->hop_limit;
2427 
2428 		qp->sgid_idx = grh->sgid_index;
2429 
2430 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2431 		if (rc) {
2432 			DP_ERR(dev,
2433 			       "modify qp: problems with GID index %d (rc=%d)\n",
2434 			       grh->sgid_index, rc);
2435 			return rc;
2436 		}
2437 
2438 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2439 				   qp_params.remote_mac_addr);
2440 		if (rc)
2441 			return rc;
2442 
2443 		qp_params.use_local_mac = true;
2444 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2445 
2446 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2447 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2448 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2449 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2450 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2451 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2452 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2453 			 qp_params.remote_mac_addr);
2454 
2455 		qp_params.mtu = qp->mtu;
2456 		qp_params.lb_indication = false;
2457 	}
2458 
2459 	if (!qp_params.mtu) {
2460 		/* Stay with current MTU */
2461 		if (qp->mtu)
2462 			qp_params.mtu = qp->mtu;
2463 		else
2464 			qp_params.mtu =
2465 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2466 	}
2467 
2468 	if (attr_mask & IB_QP_TIMEOUT) {
2469 		SET_FIELD(qp_params.modify_flags,
2470 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2471 
2472 		/* The received timeout value is an exponent used like this:
2473 		 *    "12.7.34 LOCAL ACK TIMEOUT
2474 		 *    Value representing the transport (ACK) timeout for use by
2475 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2476 		 * The FW expects timeout in msec so we need to divide the usec
2477 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2478 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2479 		 * The value of zero means infinite so we use a 'max_t' to make
2480 		 * sure that sub 1 msec values will be configured as 1 msec.
2481 		 */
2482 		if (attr->timeout)
2483 			qp_params.ack_timeout =
2484 					1 << max_t(int, attr->timeout - 8, 0);
2485 		else
2486 			qp_params.ack_timeout = 0;
2487 	}
2488 
2489 	if (attr_mask & IB_QP_RETRY_CNT) {
2490 		SET_FIELD(qp_params.modify_flags,
2491 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2492 		qp_params.retry_cnt = attr->retry_cnt;
2493 	}
2494 
2495 	if (attr_mask & IB_QP_RNR_RETRY) {
2496 		SET_FIELD(qp_params.modify_flags,
2497 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2498 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2499 	}
2500 
2501 	if (attr_mask & IB_QP_RQ_PSN) {
2502 		SET_FIELD(qp_params.modify_flags,
2503 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2504 		qp_params.rq_psn = attr->rq_psn;
2505 		qp->rq_psn = attr->rq_psn;
2506 	}
2507 
2508 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2509 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2510 			rc = -EINVAL;
2511 			DP_ERR(dev,
2512 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2513 			       attr->max_rd_atomic,
2514 			       dev->attr.max_qp_req_rd_atomic_resc);
2515 			goto err;
2516 		}
2517 
2518 		SET_FIELD(qp_params.modify_flags,
2519 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2520 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2521 	}
2522 
2523 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2524 		SET_FIELD(qp_params.modify_flags,
2525 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2526 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2527 	}
2528 
2529 	if (attr_mask & IB_QP_SQ_PSN) {
2530 		SET_FIELD(qp_params.modify_flags,
2531 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2532 		qp_params.sq_psn = attr->sq_psn;
2533 		qp->sq_psn = attr->sq_psn;
2534 	}
2535 
2536 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2537 		if (attr->max_dest_rd_atomic >
2538 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2539 			DP_ERR(dev,
2540 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2541 			       attr->max_dest_rd_atomic,
2542 			       dev->attr.max_qp_resp_rd_atomic_resc);
2543 
2544 			rc = -EINVAL;
2545 			goto err;
2546 		}
2547 
2548 		SET_FIELD(qp_params.modify_flags,
2549 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2550 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2551 	}
2552 
2553 	if (attr_mask & IB_QP_DEST_QPN) {
2554 		SET_FIELD(qp_params.modify_flags,
2555 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2556 
2557 		qp_params.dest_qp = attr->dest_qp_num;
2558 		qp->dest_qp_num = attr->dest_qp_num;
2559 	}
2560 
2561 	cur_state = qp->state;
2562 
2563 	/* Update the QP state before the actual ramrod to prevent a race with
2564 	 * fast path. Modifying the QP state to error will cause the device to
2565 	 * flush the CQEs and while polling the flushed CQEs will considered as
2566 	 * a potential issue if the QP isn't in error state.
2567 	 */
2568 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2569 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2570 		qp->state = QED_ROCE_QP_STATE_ERR;
2571 
2572 	if (qp->qp_type != IB_QPT_GSI)
2573 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2574 					      qp->qed_qp, &qp_params);
2575 
2576 	if (attr_mask & IB_QP_STATE) {
2577 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2578 			rc = qedr_update_qp_state(dev, qp, cur_state,
2579 						  qp_params.new_state);
2580 		qp->state = qp_params.new_state;
2581 	}
2582 
2583 err:
2584 	return rc;
2585 }
2586 
2587 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2588 {
2589 	int ib_qp_acc_flags = 0;
2590 
2591 	if (params->incoming_rdma_write_en)
2592 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2593 	if (params->incoming_rdma_read_en)
2594 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2595 	if (params->incoming_atomic_en)
2596 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2597 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2598 	return ib_qp_acc_flags;
2599 }
2600 
2601 int qedr_query_qp(struct ib_qp *ibqp,
2602 		  struct ib_qp_attr *qp_attr,
2603 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2604 {
2605 	struct qed_rdma_query_qp_out_params params;
2606 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2607 	struct qedr_dev *dev = qp->dev;
2608 	int rc = 0;
2609 
2610 	memset(&params, 0, sizeof(params));
2611 
2612 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2613 	if (rc)
2614 		goto err;
2615 
2616 	memset(qp_attr, 0, sizeof(*qp_attr));
2617 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2618 
2619 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2620 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2621 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2622 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2623 	qp_attr->rq_psn = params.rq_psn;
2624 	qp_attr->sq_psn = params.sq_psn;
2625 	qp_attr->dest_qp_num = params.dest_qp;
2626 
2627 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2628 
2629 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2630 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2631 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2632 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2633 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2634 	qp_init_attr->cap = qp_attr->cap;
2635 
2636 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2637 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2638 			params.flow_label, qp->sgid_idx,
2639 			params.hop_limit_ttl, params.traffic_class_tos);
2640 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2641 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2642 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2643 	qp_attr->timeout = params.timeout;
2644 	qp_attr->rnr_retry = params.rnr_retry;
2645 	qp_attr->retry_cnt = params.retry_cnt;
2646 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2647 	qp_attr->pkey_index = params.pkey_index;
2648 	qp_attr->port_num = 1;
2649 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2650 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2651 	qp_attr->alt_pkey_index = 0;
2652 	qp_attr->alt_port_num = 0;
2653 	qp_attr->alt_timeout = 0;
2654 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2655 
2656 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2657 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2658 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2659 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2660 
2661 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2662 		 qp_attr->cap.max_inline_data);
2663 
2664 err:
2665 	return rc;
2666 }
2667 
2668 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2669 				  struct ib_udata *udata)
2670 {
2671 	struct qedr_ucontext *ctx =
2672 		rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2673 					  ibucontext);
2674 	int rc;
2675 
2676 	if (qp->qp_type != IB_QPT_GSI) {
2677 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2678 		if (rc)
2679 			return rc;
2680 	}
2681 
2682 	if (qp->create_type == QEDR_QP_CREATE_USER)
2683 		qedr_cleanup_user(dev, ctx, qp);
2684 	else
2685 		qedr_cleanup_kernel(dev, qp);
2686 
2687 	return 0;
2688 }
2689 
2690 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2691 {
2692 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2693 	struct qedr_dev *dev = qp->dev;
2694 	struct ib_qp_attr attr;
2695 	int attr_mask = 0;
2696 
2697 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2698 		 qp, qp->qp_type);
2699 
2700 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2701 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2702 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2703 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2704 
2705 			attr.qp_state = IB_QPS_ERR;
2706 			attr_mask |= IB_QP_STATE;
2707 
2708 			/* Change the QP state to ERROR */
2709 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2710 		}
2711 	} else {
2712 		/* If connection establishment started the WAIT_FOR_CONNECT
2713 		 * bit will be on and we need to Wait for the establishment
2714 		 * to complete before destroying the qp.
2715 		 */
2716 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2717 				     &qp->iwarp_cm_flags))
2718 			wait_for_completion(&qp->iwarp_cm_comp);
2719 
2720 		/* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2721 		 * bit will be on, and we need to wait for the disconnect to
2722 		 * complete before continuing. We can use the same completion,
2723 		 * iwarp_cm_comp, since this is the only place that waits for
2724 		 * this completion and it is sequential. In addition,
2725 		 * disconnect can't occur before the connection is fully
2726 		 * established, therefore if WAIT_FOR_DISCONNECT is on it
2727 		 * means WAIT_FOR_CONNECT is also on and the completion for
2728 		 * CONNECT already occurred.
2729 		 */
2730 		if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2731 				     &qp->iwarp_cm_flags))
2732 			wait_for_completion(&qp->iwarp_cm_comp);
2733 	}
2734 
2735 	if (qp->qp_type == IB_QPT_GSI)
2736 		qedr_destroy_gsi_qp(dev);
2737 
2738 	/* We need to remove the entry from the xarray before we release the
2739 	 * qp_id to avoid a race of the qp_id being reallocated and failing
2740 	 * on xa_insert
2741 	 */
2742 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2743 		xa_erase(&dev->qps, qp->qp_id);
2744 
2745 	qedr_free_qp_resources(dev, qp, udata);
2746 
2747 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
2748 		qedr_iw_qp_rem_ref(&qp->ibqp);
2749 
2750 	return 0;
2751 }
2752 
2753 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
2754 		   struct ib_udata *udata)
2755 {
2756 	struct qedr_ah *ah = get_qedr_ah(ibah);
2757 
2758 	rdma_copy_ah_attr(&ah->attr, attr);
2759 
2760 	return 0;
2761 }
2762 
2763 void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2764 {
2765 	struct qedr_ah *ah = get_qedr_ah(ibah);
2766 
2767 	rdma_destroy_ah_attr(&ah->attr);
2768 }
2769 
2770 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2771 {
2772 	struct qedr_pbl *pbl, *tmp;
2773 
2774 	if (info->pbl_table)
2775 		list_add_tail(&info->pbl_table->list_entry,
2776 			      &info->free_pbl_list);
2777 
2778 	if (!list_empty(&info->inuse_pbl_list))
2779 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2780 
2781 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2782 		list_del(&pbl->list_entry);
2783 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2784 	}
2785 }
2786 
2787 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2788 			size_t page_list_len, bool two_layered)
2789 {
2790 	struct qedr_pbl *tmp;
2791 	int rc;
2792 
2793 	INIT_LIST_HEAD(&info->free_pbl_list);
2794 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2795 
2796 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2797 				  page_list_len, two_layered);
2798 	if (rc)
2799 		goto done;
2800 
2801 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2802 	if (IS_ERR(info->pbl_table)) {
2803 		rc = PTR_ERR(info->pbl_table);
2804 		goto done;
2805 	}
2806 
2807 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2808 		 &info->pbl_table->pa);
2809 
2810 	/* in usual case we use 2 PBLs, so we add one to free
2811 	 * list and allocating another one
2812 	 */
2813 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2814 	if (IS_ERR(tmp)) {
2815 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2816 		goto done;
2817 	}
2818 
2819 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2820 
2821 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2822 
2823 done:
2824 	if (rc)
2825 		free_mr_info(dev, info);
2826 
2827 	return rc;
2828 }
2829 
2830 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2831 			       u64 usr_addr, int acc, struct ib_udata *udata)
2832 {
2833 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2834 	struct qedr_mr *mr;
2835 	struct qedr_pd *pd;
2836 	int rc = -ENOMEM;
2837 
2838 	pd = get_qedr_pd(ibpd);
2839 	DP_DEBUG(dev, QEDR_MSG_MR,
2840 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2841 		 pd->pd_id, start, len, usr_addr, acc);
2842 
2843 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2844 		return ERR_PTR(-EINVAL);
2845 
2846 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2847 	if (!mr)
2848 		return ERR_PTR(rc);
2849 
2850 	mr->type = QEDR_MR_USER;
2851 
2852 	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2853 	if (IS_ERR(mr->umem)) {
2854 		rc = -EFAULT;
2855 		goto err0;
2856 	}
2857 
2858 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2859 	if (rc)
2860 		goto err1;
2861 
2862 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2863 			   &mr->info.pbl_info, PAGE_SHIFT);
2864 
2865 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2866 	if (rc) {
2867 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2868 		goto err1;
2869 	}
2870 
2871 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2872 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2873 	mr->hw_mr.key = 0;
2874 	mr->hw_mr.pd = pd->pd_id;
2875 	mr->hw_mr.local_read = 1;
2876 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2877 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2878 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2879 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2880 	mr->hw_mr.mw_bind = false;
2881 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2882 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2883 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2884 	mr->hw_mr.page_size_log = PAGE_SHIFT;
2885 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2886 	mr->hw_mr.length = len;
2887 	mr->hw_mr.vaddr = usr_addr;
2888 	mr->hw_mr.zbva = false;
2889 	mr->hw_mr.phy_mr = false;
2890 	mr->hw_mr.dma_mr = false;
2891 
2892 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2893 	if (rc) {
2894 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2895 		goto err2;
2896 	}
2897 
2898 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2899 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2900 	    mr->hw_mr.remote_atomic)
2901 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2902 
2903 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2904 		 mr->ibmr.lkey);
2905 	return &mr->ibmr;
2906 
2907 err2:
2908 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2909 err1:
2910 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2911 err0:
2912 	kfree(mr);
2913 	return ERR_PTR(rc);
2914 }
2915 
2916 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2917 {
2918 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2919 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2920 	int rc = 0;
2921 
2922 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2923 	if (rc)
2924 		return rc;
2925 
2926 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2927 
2928 	if (mr->type != QEDR_MR_DMA)
2929 		free_mr_info(dev, &mr->info);
2930 
2931 	/* it could be user registered memory. */
2932 	ib_umem_release(mr->umem);
2933 
2934 	kfree(mr);
2935 
2936 	return rc;
2937 }
2938 
2939 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2940 				       int max_page_list_len)
2941 {
2942 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2943 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2944 	struct qedr_mr *mr;
2945 	int rc = -ENOMEM;
2946 
2947 	DP_DEBUG(dev, QEDR_MSG_MR,
2948 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2949 		 max_page_list_len);
2950 
2951 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2952 	if (!mr)
2953 		return ERR_PTR(rc);
2954 
2955 	mr->dev = dev;
2956 	mr->type = QEDR_MR_FRMR;
2957 
2958 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2959 	if (rc)
2960 		goto err0;
2961 
2962 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2963 	if (rc) {
2964 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2965 		goto err0;
2966 	}
2967 
2968 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2969 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2970 	mr->hw_mr.key = 0;
2971 	mr->hw_mr.pd = pd->pd_id;
2972 	mr->hw_mr.local_read = 1;
2973 	mr->hw_mr.local_write = 0;
2974 	mr->hw_mr.remote_read = 0;
2975 	mr->hw_mr.remote_write = 0;
2976 	mr->hw_mr.remote_atomic = 0;
2977 	mr->hw_mr.mw_bind = false;
2978 	mr->hw_mr.pbl_ptr = 0;
2979 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2980 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2981 	mr->hw_mr.fbo = 0;
2982 	mr->hw_mr.length = 0;
2983 	mr->hw_mr.vaddr = 0;
2984 	mr->hw_mr.zbva = false;
2985 	mr->hw_mr.phy_mr = true;
2986 	mr->hw_mr.dma_mr = false;
2987 
2988 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2989 	if (rc) {
2990 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2991 		goto err1;
2992 	}
2993 
2994 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2995 	mr->ibmr.rkey = mr->ibmr.lkey;
2996 
2997 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2998 	return mr;
2999 
3000 err1:
3001 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3002 err0:
3003 	kfree(mr);
3004 	return ERR_PTR(rc);
3005 }
3006 
3007 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3008 			    u32 max_num_sg, struct ib_udata *udata)
3009 {
3010 	struct qedr_mr *mr;
3011 
3012 	if (mr_type != IB_MR_TYPE_MEM_REG)
3013 		return ERR_PTR(-EINVAL);
3014 
3015 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
3016 
3017 	if (IS_ERR(mr))
3018 		return ERR_PTR(-EINVAL);
3019 
3020 	return &mr->ibmr;
3021 }
3022 
3023 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3024 {
3025 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3026 	struct qedr_pbl *pbl_table;
3027 	struct regpair *pbe;
3028 	u32 pbes_in_page;
3029 
3030 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3031 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3032 		return -ENOMEM;
3033 	}
3034 
3035 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3036 		 mr->npages, addr);
3037 
3038 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3039 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3040 	pbe = (struct regpair *)pbl_table->va;
3041 	pbe +=  mr->npages % pbes_in_page;
3042 	pbe->lo = cpu_to_le32((u32)addr);
3043 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3044 
3045 	mr->npages++;
3046 
3047 	return 0;
3048 }
3049 
3050 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3051 {
3052 	int work = info->completed - info->completed_handled - 1;
3053 
3054 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3055 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3056 		struct qedr_pbl *pbl;
3057 
3058 		/* Free all the page list that are possible to be freed
3059 		 * (all the ones that were invalidated), under the assumption
3060 		 * that if an FMR was completed successfully that means that
3061 		 * if there was an invalidate operation before it also ended
3062 		 */
3063 		pbl = list_first_entry(&info->inuse_pbl_list,
3064 				       struct qedr_pbl, list_entry);
3065 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3066 		info->completed_handled++;
3067 	}
3068 }
3069 
3070 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3071 		   int sg_nents, unsigned int *sg_offset)
3072 {
3073 	struct qedr_mr *mr = get_qedr_mr(ibmr);
3074 
3075 	mr->npages = 0;
3076 
3077 	handle_completed_mrs(mr->dev, &mr->info);
3078 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3079 }
3080 
3081 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3082 {
3083 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3084 	struct qedr_pd *pd = get_qedr_pd(ibpd);
3085 	struct qedr_mr *mr;
3086 	int rc;
3087 
3088 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3089 	if (!mr)
3090 		return ERR_PTR(-ENOMEM);
3091 
3092 	mr->type = QEDR_MR_DMA;
3093 
3094 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3095 	if (rc) {
3096 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
3097 		goto err1;
3098 	}
3099 
3100 	/* index only, 18 bit long, lkey = itid << 8 | key */
3101 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3102 	mr->hw_mr.pd = pd->pd_id;
3103 	mr->hw_mr.local_read = 1;
3104 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3105 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3106 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3107 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3108 	mr->hw_mr.dma_mr = true;
3109 
3110 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3111 	if (rc) {
3112 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3113 		goto err2;
3114 	}
3115 
3116 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3117 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3118 	    mr->hw_mr.remote_atomic)
3119 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3120 
3121 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3122 	return &mr->ibmr;
3123 
3124 err2:
3125 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3126 err1:
3127 	kfree(mr);
3128 	return ERR_PTR(rc);
3129 }
3130 
3131 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3132 {
3133 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3134 }
3135 
3136 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3137 {
3138 	int i, len = 0;
3139 
3140 	for (i = 0; i < num_sge; i++)
3141 		len += sg_list[i].length;
3142 
3143 	return len;
3144 }
3145 
3146 static void swap_wqe_data64(u64 *p)
3147 {
3148 	int i;
3149 
3150 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3151 		*p = cpu_to_be64(cpu_to_le64(*p));
3152 }
3153 
3154 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3155 				       struct qedr_qp *qp, u8 *wqe_size,
3156 				       const struct ib_send_wr *wr,
3157 				       const struct ib_send_wr **bad_wr,
3158 				       u8 *bits, u8 bit)
3159 {
3160 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3161 	char *seg_prt, *wqe;
3162 	int i, seg_siz;
3163 
3164 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3165 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3166 		*bad_wr = wr;
3167 		return 0;
3168 	}
3169 
3170 	if (!data_size)
3171 		return data_size;
3172 
3173 	*bits |= bit;
3174 
3175 	seg_prt = NULL;
3176 	wqe = NULL;
3177 	seg_siz = 0;
3178 
3179 	/* Copy data inline */
3180 	for (i = 0; i < wr->num_sge; i++) {
3181 		u32 len = wr->sg_list[i].length;
3182 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3183 
3184 		while (len > 0) {
3185 			u32 cur;
3186 
3187 			/* New segment required */
3188 			if (!seg_siz) {
3189 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3190 				seg_prt = wqe;
3191 				seg_siz = sizeof(struct rdma_sq_common_wqe);
3192 				(*wqe_size)++;
3193 			}
3194 
3195 			/* Calculate currently allowed length */
3196 			cur = min_t(u32, len, seg_siz);
3197 			memcpy(seg_prt, src, cur);
3198 
3199 			/* Update segment variables */
3200 			seg_prt += cur;
3201 			seg_siz -= cur;
3202 
3203 			/* Update sge variables */
3204 			src += cur;
3205 			len -= cur;
3206 
3207 			/* Swap fully-completed segments */
3208 			if (!seg_siz)
3209 				swap_wqe_data64((u64 *)wqe);
3210 		}
3211 	}
3212 
3213 	/* swap last not completed segment */
3214 	if (seg_siz)
3215 		swap_wqe_data64((u64 *)wqe);
3216 
3217 	return data_size;
3218 }
3219 
3220 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
3221 	do {							\
3222 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3223 		(sge)->length = cpu_to_le32(vlength);		\
3224 		(sge)->flags = cpu_to_le32(vflags);		\
3225 	} while (0)
3226 
3227 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
3228 	do {							\
3229 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
3230 		(hdr)->num_sges = num_sge;			\
3231 	} while (0)
3232 
3233 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
3234 	do {							\
3235 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
3236 		(sge)->length = cpu_to_le32(vlength);		\
3237 		(sge)->l_key = cpu_to_le32(vlkey);		\
3238 	} while (0)
3239 
3240 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3241 				const struct ib_send_wr *wr)
3242 {
3243 	u32 data_size = 0;
3244 	int i;
3245 
3246 	for (i = 0; i < wr->num_sge; i++) {
3247 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3248 
3249 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3250 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3251 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3252 		data_size += wr->sg_list[i].length;
3253 	}
3254 
3255 	if (wqe_size)
3256 		*wqe_size += wr->num_sge;
3257 
3258 	return data_size;
3259 }
3260 
3261 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3262 				     struct qedr_qp *qp,
3263 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3264 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3265 				     const struct ib_send_wr *wr,
3266 				     const struct ib_send_wr **bad_wr)
3267 {
3268 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3269 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3270 
3271 	if (wr->send_flags & IB_SEND_INLINE &&
3272 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3273 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3274 		u8 flags = 0;
3275 
3276 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3277 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3278 						   bad_wr, &rwqe->flags, flags);
3279 	}
3280 
3281 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3282 }
3283 
3284 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3285 				     struct qedr_qp *qp,
3286 				     struct rdma_sq_send_wqe_1st *swqe,
3287 				     struct rdma_sq_send_wqe_2st *swqe2,
3288 				     const struct ib_send_wr *wr,
3289 				     const struct ib_send_wr **bad_wr)
3290 {
3291 	memset(swqe2, 0, sizeof(*swqe2));
3292 	if (wr->send_flags & IB_SEND_INLINE) {
3293 		u8 flags = 0;
3294 
3295 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3296 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3297 						   bad_wr, &swqe->flags, flags);
3298 	}
3299 
3300 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3301 }
3302 
3303 static int qedr_prepare_reg(struct qedr_qp *qp,
3304 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3305 			    const struct ib_reg_wr *wr)
3306 {
3307 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3308 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3309 
3310 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3311 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3312 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3313 	fwqe1->l_key = wr->key;
3314 
3315 	fwqe2->access_ctrl = 0;
3316 
3317 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3318 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3319 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3320 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3321 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3322 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3323 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3324 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3325 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3326 	fwqe2->fmr_ctrl = 0;
3327 
3328 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3329 		   ilog2(mr->ibmr.page_size) - 12);
3330 
3331 	fwqe2->length_hi = 0;
3332 	fwqe2->length_lo = mr->ibmr.length;
3333 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3334 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3335 
3336 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3337 
3338 	return 0;
3339 }
3340 
3341 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3342 {
3343 	switch (opcode) {
3344 	case IB_WR_RDMA_WRITE:
3345 	case IB_WR_RDMA_WRITE_WITH_IMM:
3346 		return IB_WC_RDMA_WRITE;
3347 	case IB_WR_SEND_WITH_IMM:
3348 	case IB_WR_SEND:
3349 	case IB_WR_SEND_WITH_INV:
3350 		return IB_WC_SEND;
3351 	case IB_WR_RDMA_READ:
3352 	case IB_WR_RDMA_READ_WITH_INV:
3353 		return IB_WC_RDMA_READ;
3354 	case IB_WR_ATOMIC_CMP_AND_SWP:
3355 		return IB_WC_COMP_SWAP;
3356 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3357 		return IB_WC_FETCH_ADD;
3358 	case IB_WR_REG_MR:
3359 		return IB_WC_REG_MR;
3360 	case IB_WR_LOCAL_INV:
3361 		return IB_WC_LOCAL_INV;
3362 	default:
3363 		return IB_WC_SEND;
3364 	}
3365 }
3366 
3367 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3368 				      const struct ib_send_wr *wr)
3369 {
3370 	int wq_is_full, err_wr, pbl_is_full;
3371 	struct qedr_dev *dev = qp->dev;
3372 
3373 	/* prevent SQ overflow and/or processing of a bad WR */
3374 	err_wr = wr->num_sge > qp->sq.max_sges;
3375 	wq_is_full = qedr_wq_is_full(&qp->sq);
3376 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3377 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3378 	if (wq_is_full || err_wr || pbl_is_full) {
3379 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3380 			DP_ERR(dev,
3381 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3382 			       qp);
3383 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3384 		}
3385 
3386 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3387 			DP_ERR(dev,
3388 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3389 			       qp);
3390 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3391 		}
3392 
3393 		if (pbl_is_full &&
3394 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3395 			DP_ERR(dev,
3396 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3397 			       qp);
3398 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3399 		}
3400 		return false;
3401 	}
3402 	return true;
3403 }
3404 
3405 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3406 			    const struct ib_send_wr **bad_wr)
3407 {
3408 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3409 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3410 	struct rdma_sq_atomic_wqe_1st *awqe1;
3411 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3412 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3413 	struct rdma_sq_send_wqe_2st *swqe2;
3414 	struct rdma_sq_local_inv_wqe *iwqe;
3415 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3416 	struct rdma_sq_send_wqe_1st *swqe;
3417 	struct rdma_sq_rdma_wqe_1st *rwqe;
3418 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3419 	struct rdma_sq_common_wqe *wqe;
3420 	u32 length;
3421 	int rc = 0;
3422 	bool comp;
3423 
3424 	if (!qedr_can_post_send(qp, wr)) {
3425 		*bad_wr = wr;
3426 		return -ENOMEM;
3427 	}
3428 
3429 	wqe = qed_chain_produce(&qp->sq.pbl);
3430 	qp->wqe_wr_id[qp->sq.prod].signaled =
3431 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3432 
3433 	wqe->flags = 0;
3434 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3435 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3436 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3437 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3438 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3439 		   !!(wr->send_flags & IB_SEND_FENCE));
3440 	wqe->prev_wqe_size = qp->prev_wqe_size;
3441 
3442 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3443 
3444 	switch (wr->opcode) {
3445 	case IB_WR_SEND_WITH_IMM:
3446 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3447 			rc = -EINVAL;
3448 			*bad_wr = wr;
3449 			break;
3450 		}
3451 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3452 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3453 		swqe->wqe_size = 2;
3454 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3455 
3456 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3457 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3458 						   wr, bad_wr);
3459 		swqe->length = cpu_to_le32(length);
3460 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3461 		qp->prev_wqe_size = swqe->wqe_size;
3462 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3463 		break;
3464 	case IB_WR_SEND:
3465 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3466 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3467 
3468 		swqe->wqe_size = 2;
3469 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3470 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3471 						   wr, bad_wr);
3472 		swqe->length = cpu_to_le32(length);
3473 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3474 		qp->prev_wqe_size = swqe->wqe_size;
3475 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3476 		break;
3477 	case IB_WR_SEND_WITH_INV:
3478 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3479 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3480 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3481 		swqe->wqe_size = 2;
3482 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3483 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3484 						   wr, bad_wr);
3485 		swqe->length = cpu_to_le32(length);
3486 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3487 		qp->prev_wqe_size = swqe->wqe_size;
3488 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3489 		break;
3490 
3491 	case IB_WR_RDMA_WRITE_WITH_IMM:
3492 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3493 			rc = -EINVAL;
3494 			*bad_wr = wr;
3495 			break;
3496 		}
3497 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3498 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3499 
3500 		rwqe->wqe_size = 2;
3501 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3502 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3503 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3504 						   wr, bad_wr);
3505 		rwqe->length = cpu_to_le32(length);
3506 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3507 		qp->prev_wqe_size = rwqe->wqe_size;
3508 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3509 		break;
3510 	case IB_WR_RDMA_WRITE:
3511 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3512 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3513 
3514 		rwqe->wqe_size = 2;
3515 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3516 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3517 						   wr, bad_wr);
3518 		rwqe->length = cpu_to_le32(length);
3519 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3520 		qp->prev_wqe_size = rwqe->wqe_size;
3521 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3522 		break;
3523 	case IB_WR_RDMA_READ_WITH_INV:
3524 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3525 		/* fallthrough -- same is identical to RDMA READ */
3526 
3527 	case IB_WR_RDMA_READ:
3528 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3529 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3530 
3531 		rwqe->wqe_size = 2;
3532 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3533 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3534 						   wr, bad_wr);
3535 		rwqe->length = cpu_to_le32(length);
3536 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3537 		qp->prev_wqe_size = rwqe->wqe_size;
3538 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3539 		break;
3540 
3541 	case IB_WR_ATOMIC_CMP_AND_SWP:
3542 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3543 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3544 		awqe1->wqe_size = 4;
3545 
3546 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3547 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3548 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3549 
3550 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3551 
3552 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3553 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3554 			DMA_REGPAIR_LE(awqe3->swap_data,
3555 				       atomic_wr(wr)->compare_add);
3556 		} else {
3557 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3558 			DMA_REGPAIR_LE(awqe3->swap_data,
3559 				       atomic_wr(wr)->swap);
3560 			DMA_REGPAIR_LE(awqe3->cmp_data,
3561 				       atomic_wr(wr)->compare_add);
3562 		}
3563 
3564 		qedr_prepare_sq_sges(qp, NULL, wr);
3565 
3566 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3567 		qp->prev_wqe_size = awqe1->wqe_size;
3568 		break;
3569 
3570 	case IB_WR_LOCAL_INV:
3571 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3572 		iwqe->wqe_size = 1;
3573 
3574 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3575 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3576 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3577 		qp->prev_wqe_size = iwqe->wqe_size;
3578 		break;
3579 	case IB_WR_REG_MR:
3580 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3581 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3582 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3583 		fwqe1->wqe_size = 2;
3584 
3585 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3586 		if (rc) {
3587 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3588 			*bad_wr = wr;
3589 			break;
3590 		}
3591 
3592 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3593 		qp->prev_wqe_size = fwqe1->wqe_size;
3594 		break;
3595 	default:
3596 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3597 		rc = -EINVAL;
3598 		*bad_wr = wr;
3599 		break;
3600 	}
3601 
3602 	if (*bad_wr) {
3603 		u16 value;
3604 
3605 		/* Restore prod to its position before
3606 		 * this WR was processed
3607 		 */
3608 		value = le16_to_cpu(qp->sq.db_data.data.value);
3609 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3610 
3611 		/* Restore prev_wqe_size */
3612 		qp->prev_wqe_size = wqe->prev_wqe_size;
3613 		rc = -EINVAL;
3614 		DP_ERR(dev, "POST SEND FAILED\n");
3615 	}
3616 
3617 	return rc;
3618 }
3619 
3620 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3621 		   const struct ib_send_wr **bad_wr)
3622 {
3623 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3624 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3625 	unsigned long flags;
3626 	int rc = 0;
3627 
3628 	*bad_wr = NULL;
3629 
3630 	if (qp->qp_type == IB_QPT_GSI)
3631 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3632 
3633 	spin_lock_irqsave(&qp->q_lock, flags);
3634 
3635 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3636 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3637 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3638 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3639 			spin_unlock_irqrestore(&qp->q_lock, flags);
3640 			*bad_wr = wr;
3641 			DP_DEBUG(dev, QEDR_MSG_CQ,
3642 				 "QP in wrong state! QP icid=0x%x state %d\n",
3643 				 qp->icid, qp->state);
3644 			return -EINVAL;
3645 		}
3646 	}
3647 
3648 	while (wr) {
3649 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3650 		if (rc)
3651 			break;
3652 
3653 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3654 
3655 		qedr_inc_sw_prod(&qp->sq);
3656 
3657 		qp->sq.db_data.data.value++;
3658 
3659 		wr = wr->next;
3660 	}
3661 
3662 	/* Trigger doorbell
3663 	 * If there was a failure in the first WR then it will be triggered in
3664 	 * vane. However this is not harmful (as long as the producer value is
3665 	 * unchanged). For performance reasons we avoid checking for this
3666 	 * redundant doorbell.
3667 	 *
3668 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3669 	 * soon as we give the doorbell, we could get a completion
3670 	 * for this wr, therefore we need to make sure that the
3671 	 * memory is updated before giving the doorbell.
3672 	 * During qedr_poll_cq, rmb is called before accessing the
3673 	 * cqe. This covers for the smp_rmb as well.
3674 	 */
3675 	smp_wmb();
3676 	writel(qp->sq.db_data.raw, qp->sq.db);
3677 
3678 	spin_unlock_irqrestore(&qp->q_lock, flags);
3679 
3680 	return rc;
3681 }
3682 
3683 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3684 {
3685 	u32 used;
3686 
3687 	/* Calculate number of elements used based on producer
3688 	 * count and consumer count and subtract it from max
3689 	 * work request supported so that we get elements left.
3690 	 */
3691 	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3692 
3693 	return hw_srq->max_wr - used;
3694 }
3695 
3696 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3697 		       const struct ib_recv_wr **bad_wr)
3698 {
3699 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3700 	struct qedr_srq_hwq_info *hw_srq;
3701 	struct qedr_dev *dev = srq->dev;
3702 	struct qed_chain *pbl;
3703 	unsigned long flags;
3704 	int status = 0;
3705 	u32 num_sge;
3706 	u32 offset;
3707 
3708 	spin_lock_irqsave(&srq->lock, flags);
3709 
3710 	hw_srq = &srq->hw_srq;
3711 	pbl = &srq->hw_srq.pbl;
3712 	while (wr) {
3713 		struct rdma_srq_wqe_header *hdr;
3714 		int i;
3715 
3716 		if (!qedr_srq_elem_left(hw_srq) ||
3717 		    wr->num_sge > srq->hw_srq.max_sges) {
3718 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3719 			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3720 			       wr->num_sge, srq->hw_srq.max_sges);
3721 			status = -ENOMEM;
3722 			*bad_wr = wr;
3723 			break;
3724 		}
3725 
3726 		hdr = qed_chain_produce(pbl);
3727 		num_sge = wr->num_sge;
3728 		/* Set number of sge and work request id in header */
3729 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3730 
3731 		srq->hw_srq.wr_prod_cnt++;
3732 		hw_srq->wqe_prod++;
3733 		hw_srq->sge_prod++;
3734 
3735 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3736 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3737 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3738 
3739 		for (i = 0; i < wr->num_sge; i++) {
3740 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3741 
3742 			/* Set SGE length, lkey and address */
3743 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3744 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3745 
3746 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3747 				 "[%d]: len %d key %x addr %x:%x\n",
3748 				 i, srq_sge->length, srq_sge->l_key,
3749 				 srq_sge->addr.hi, srq_sge->addr.lo);
3750 			hw_srq->sge_prod++;
3751 		}
3752 
3753 		/* Flush WQE and SGE information before
3754 		 * updating producer.
3755 		 */
3756 		wmb();
3757 
3758 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3759 		 * in first 4 bytes and need to update WQE producer in
3760 		 * next 4 bytes.
3761 		 */
3762 		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3763 		offset = offsetof(struct rdma_srq_producers, wqe_prod);
3764 		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3765 			hw_srq->wqe_prod;
3766 
3767 		/* Flush producer after updating it. */
3768 		wmb();
3769 		wr = wr->next;
3770 	}
3771 
3772 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3773 		 qed_chain_get_elem_left(pbl));
3774 	spin_unlock_irqrestore(&srq->lock, flags);
3775 
3776 	return status;
3777 }
3778 
3779 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3780 		   const struct ib_recv_wr **bad_wr)
3781 {
3782 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3783 	struct qedr_dev *dev = qp->dev;
3784 	unsigned long flags;
3785 	int status = 0;
3786 
3787 	if (qp->qp_type == IB_QPT_GSI)
3788 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3789 
3790 	spin_lock_irqsave(&qp->q_lock, flags);
3791 
3792 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3793 		spin_unlock_irqrestore(&qp->q_lock, flags);
3794 		*bad_wr = wr;
3795 		return -EINVAL;
3796 	}
3797 
3798 	while (wr) {
3799 		int i;
3800 
3801 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3802 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3803 		    wr->num_sge > qp->rq.max_sges) {
3804 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3805 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3806 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3807 			       qp->rq.max_sges);
3808 			status = -ENOMEM;
3809 			*bad_wr = wr;
3810 			break;
3811 		}
3812 		for (i = 0; i < wr->num_sge; i++) {
3813 			u32 flags = 0;
3814 			struct rdma_rq_sge *rqe =
3815 			    qed_chain_produce(&qp->rq.pbl);
3816 
3817 			/* First one must include the number
3818 			 * of SGE in the list
3819 			 */
3820 			if (!i)
3821 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3822 					  wr->num_sge);
3823 
3824 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3825 				  wr->sg_list[i].lkey);
3826 
3827 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3828 				   wr->sg_list[i].length, flags);
3829 		}
3830 
3831 		/* Special case of no sges. FW requires between 1-4 sges...
3832 		 * in this case we need to post 1 sge with length zero. this is
3833 		 * because rdma write with immediate consumes an RQ.
3834 		 */
3835 		if (!wr->num_sge) {
3836 			u32 flags = 0;
3837 			struct rdma_rq_sge *rqe =
3838 			    qed_chain_produce(&qp->rq.pbl);
3839 
3840 			/* First one must include the number
3841 			 * of SGE in the list
3842 			 */
3843 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3844 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3845 
3846 			RQ_SGE_SET(rqe, 0, 0, flags);
3847 			i = 1;
3848 		}
3849 
3850 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3851 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3852 
3853 		qedr_inc_sw_prod(&qp->rq);
3854 
3855 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3856 		 * soon as we give the doorbell, we could get a completion
3857 		 * for this wr, therefore we need to make sure that the
3858 		 * memory is update before giving the doorbell.
3859 		 * During qedr_poll_cq, rmb is called before accessing the
3860 		 * cqe. This covers for the smp_rmb as well.
3861 		 */
3862 		smp_wmb();
3863 
3864 		qp->rq.db_data.data.value++;
3865 
3866 		writel(qp->rq.db_data.raw, qp->rq.db);
3867 
3868 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3869 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3870 		}
3871 
3872 		wr = wr->next;
3873 	}
3874 
3875 	spin_unlock_irqrestore(&qp->q_lock, flags);
3876 
3877 	return status;
3878 }
3879 
3880 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3881 {
3882 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3883 
3884 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3885 		cq->pbl_toggle;
3886 }
3887 
3888 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3889 {
3890 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3891 	struct qedr_qp *qp;
3892 
3893 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3894 						   resp_cqe->qp_handle.lo,
3895 						   u64);
3896 	return qp;
3897 }
3898 
3899 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3900 {
3901 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3902 
3903 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3904 }
3905 
3906 /* Return latest CQE (needs processing) */
3907 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3908 {
3909 	return cq->latest_cqe;
3910 }
3911 
3912 /* In fmr we need to increase the number of fmr completed counter for the fmr
3913  * algorithm determining whether we can free a pbl or not.
3914  * we need to perform this whether the work request was signaled or not. for
3915  * this purpose we call this function from the condition that checks if a wr
3916  * should be skipped, to make sure we don't miss it ( possibly this fmr
3917  * operation was not signalted)
3918  */
3919 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3920 {
3921 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3922 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3923 }
3924 
3925 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3926 		       struct qedr_cq *cq, int num_entries,
3927 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3928 		       int force)
3929 {
3930 	u16 cnt = 0;
3931 
3932 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3933 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3934 			qedr_chk_if_fmr(qp);
3935 			/* skip WC */
3936 			goto next_cqe;
3937 		}
3938 
3939 		/* fill WC */
3940 		wc->status = status;
3941 		wc->vendor_err = 0;
3942 		wc->wc_flags = 0;
3943 		wc->src_qp = qp->id;
3944 		wc->qp = &qp->ibqp;
3945 
3946 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3947 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3948 
3949 		switch (wc->opcode) {
3950 		case IB_WC_RDMA_WRITE:
3951 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3952 			break;
3953 		case IB_WC_COMP_SWAP:
3954 		case IB_WC_FETCH_ADD:
3955 			wc->byte_len = 8;
3956 			break;
3957 		case IB_WC_REG_MR:
3958 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3959 			break;
3960 		case IB_WC_RDMA_READ:
3961 		case IB_WC_SEND:
3962 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3963 			break;
3964 		default:
3965 			break;
3966 		}
3967 
3968 		num_entries--;
3969 		wc++;
3970 		cnt++;
3971 next_cqe:
3972 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3973 			qed_chain_consume(&qp->sq.pbl);
3974 		qedr_inc_sw_cons(&qp->sq);
3975 	}
3976 
3977 	return cnt;
3978 }
3979 
3980 static int qedr_poll_cq_req(struct qedr_dev *dev,
3981 			    struct qedr_qp *qp, struct qedr_cq *cq,
3982 			    int num_entries, struct ib_wc *wc,
3983 			    struct rdma_cqe_requester *req)
3984 {
3985 	int cnt = 0;
3986 
3987 	switch (req->status) {
3988 	case RDMA_CQE_REQ_STS_OK:
3989 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3990 				  IB_WC_SUCCESS, 0);
3991 		break;
3992 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3993 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3994 			DP_DEBUG(dev, QEDR_MSG_CQ,
3995 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3996 				 cq->icid, qp->icid);
3997 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3998 				  IB_WC_WR_FLUSH_ERR, 1);
3999 		break;
4000 	default:
4001 		/* process all WQE before the cosumer */
4002 		qp->state = QED_ROCE_QP_STATE_ERR;
4003 		cnt = process_req(dev, qp, cq, num_entries, wc,
4004 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
4005 		wc += cnt;
4006 		/* if we have extra WC fill it with actual error info */
4007 		if (cnt < num_entries) {
4008 			enum ib_wc_status wc_status;
4009 
4010 			switch (req->status) {
4011 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4012 				DP_ERR(dev,
4013 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4014 				       cq->icid, qp->icid);
4015 				wc_status = IB_WC_BAD_RESP_ERR;
4016 				break;
4017 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4018 				DP_ERR(dev,
4019 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4020 				       cq->icid, qp->icid);
4021 				wc_status = IB_WC_LOC_LEN_ERR;
4022 				break;
4023 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4024 				DP_ERR(dev,
4025 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4026 				       cq->icid, qp->icid);
4027 				wc_status = IB_WC_LOC_QP_OP_ERR;
4028 				break;
4029 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4030 				DP_ERR(dev,
4031 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4032 				       cq->icid, qp->icid);
4033 				wc_status = IB_WC_LOC_PROT_ERR;
4034 				break;
4035 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4036 				DP_ERR(dev,
4037 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4038 				       cq->icid, qp->icid);
4039 				wc_status = IB_WC_MW_BIND_ERR;
4040 				break;
4041 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4042 				DP_ERR(dev,
4043 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4044 				       cq->icid, qp->icid);
4045 				wc_status = IB_WC_REM_INV_REQ_ERR;
4046 				break;
4047 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4048 				DP_ERR(dev,
4049 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4050 				       cq->icid, qp->icid);
4051 				wc_status = IB_WC_REM_ACCESS_ERR;
4052 				break;
4053 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4054 				DP_ERR(dev,
4055 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4056 				       cq->icid, qp->icid);
4057 				wc_status = IB_WC_REM_OP_ERR;
4058 				break;
4059 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4060 				DP_ERR(dev,
4061 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4062 				       cq->icid, qp->icid);
4063 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4064 				break;
4065 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4066 				DP_ERR(dev,
4067 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4068 				       cq->icid, qp->icid);
4069 				wc_status = IB_WC_RETRY_EXC_ERR;
4070 				break;
4071 			default:
4072 				DP_ERR(dev,
4073 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4074 				       cq->icid, qp->icid);
4075 				wc_status = IB_WC_GENERAL_ERR;
4076 			}
4077 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4078 					   wc_status, 1);
4079 		}
4080 	}
4081 
4082 	return cnt;
4083 }
4084 
4085 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4086 {
4087 	switch (status) {
4088 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4089 		return IB_WC_LOC_ACCESS_ERR;
4090 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4091 		return IB_WC_LOC_LEN_ERR;
4092 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4093 		return IB_WC_LOC_QP_OP_ERR;
4094 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4095 		return IB_WC_LOC_PROT_ERR;
4096 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4097 		return IB_WC_MW_BIND_ERR;
4098 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4099 		return IB_WC_REM_INV_RD_REQ_ERR;
4100 	case RDMA_CQE_RESP_STS_OK:
4101 		return IB_WC_SUCCESS;
4102 	default:
4103 		return IB_WC_GENERAL_ERR;
4104 	}
4105 }
4106 
4107 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4108 					  struct ib_wc *wc)
4109 {
4110 	wc->status = IB_WC_SUCCESS;
4111 	wc->byte_len = le32_to_cpu(resp->length);
4112 
4113 	if (resp->flags & QEDR_RESP_IMM) {
4114 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4115 		wc->wc_flags |= IB_WC_WITH_IMM;
4116 
4117 		if (resp->flags & QEDR_RESP_RDMA)
4118 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4119 
4120 		if (resp->flags & QEDR_RESP_INV)
4121 			return -EINVAL;
4122 
4123 	} else if (resp->flags & QEDR_RESP_INV) {
4124 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4125 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4126 
4127 		if (resp->flags & QEDR_RESP_RDMA)
4128 			return -EINVAL;
4129 
4130 	} else if (resp->flags & QEDR_RESP_RDMA) {
4131 		return -EINVAL;
4132 	}
4133 
4134 	return 0;
4135 }
4136 
4137 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4138 			       struct qedr_cq *cq, struct ib_wc *wc,
4139 			       struct rdma_cqe_responder *resp, u64 wr_id)
4140 {
4141 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4142 	wc->opcode = IB_WC_RECV;
4143 	wc->wc_flags = 0;
4144 
4145 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4146 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
4147 			DP_ERR(dev,
4148 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4149 			       cq, cq->icid, resp->flags);
4150 
4151 	} else {
4152 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4153 		if (wc->status == IB_WC_GENERAL_ERR)
4154 			DP_ERR(dev,
4155 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4156 			       cq, cq->icid, resp->status);
4157 	}
4158 
4159 	/* Fill the rest of the WC */
4160 	wc->vendor_err = 0;
4161 	wc->src_qp = qp->id;
4162 	wc->qp = &qp->ibqp;
4163 	wc->wr_id = wr_id;
4164 }
4165 
4166 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4167 				struct qedr_cq *cq, struct ib_wc *wc,
4168 				struct rdma_cqe_responder *resp)
4169 {
4170 	struct qedr_srq *srq = qp->srq;
4171 	u64 wr_id;
4172 
4173 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4174 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
4175 
4176 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4177 		wc->status = IB_WC_WR_FLUSH_ERR;
4178 		wc->vendor_err = 0;
4179 		wc->wr_id = wr_id;
4180 		wc->byte_len = 0;
4181 		wc->src_qp = qp->id;
4182 		wc->qp = &qp->ibqp;
4183 		wc->wr_id = wr_id;
4184 	} else {
4185 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4186 	}
4187 	srq->hw_srq.wr_cons_cnt++;
4188 
4189 	return 1;
4190 }
4191 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4192 			    struct qedr_cq *cq, struct ib_wc *wc,
4193 			    struct rdma_cqe_responder *resp)
4194 {
4195 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4196 
4197 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
4198 
4199 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4200 		qed_chain_consume(&qp->rq.pbl);
4201 	qedr_inc_sw_cons(&qp->rq);
4202 
4203 	return 1;
4204 }
4205 
4206 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4207 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
4208 {
4209 	u16 cnt = 0;
4210 
4211 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
4212 		/* fill WC */
4213 		wc->status = IB_WC_WR_FLUSH_ERR;
4214 		wc->vendor_err = 0;
4215 		wc->wc_flags = 0;
4216 		wc->src_qp = qp->id;
4217 		wc->byte_len = 0;
4218 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4219 		wc->qp = &qp->ibqp;
4220 		num_entries--;
4221 		wc++;
4222 		cnt++;
4223 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4224 			qed_chain_consume(&qp->rq.pbl);
4225 		qedr_inc_sw_cons(&qp->rq);
4226 	}
4227 
4228 	return cnt;
4229 }
4230 
4231 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4232 				 struct rdma_cqe_responder *resp, int *update)
4233 {
4234 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4235 		consume_cqe(cq);
4236 		*update |= 1;
4237 	}
4238 }
4239 
4240 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4241 				 struct qedr_cq *cq, int num_entries,
4242 				 struct ib_wc *wc,
4243 				 struct rdma_cqe_responder *resp)
4244 {
4245 	int cnt;
4246 
4247 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4248 	consume_cqe(cq);
4249 
4250 	return cnt;
4251 }
4252 
4253 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4254 			     struct qedr_cq *cq, int num_entries,
4255 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4256 			     int *update)
4257 {
4258 	int cnt;
4259 
4260 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4261 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4262 					 resp->rq_cons_or_srq_id);
4263 		try_consume_resp_cqe(cq, qp, resp, update);
4264 	} else {
4265 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4266 		consume_cqe(cq);
4267 		*update |= 1;
4268 	}
4269 
4270 	return cnt;
4271 }
4272 
4273 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4274 				struct rdma_cqe_requester *req, int *update)
4275 {
4276 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4277 		consume_cqe(cq);
4278 		*update |= 1;
4279 	}
4280 }
4281 
4282 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4283 {
4284 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4285 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4286 	union rdma_cqe *cqe;
4287 	u32 old_cons, new_cons;
4288 	unsigned long flags;
4289 	int update = 0;
4290 	int done = 0;
4291 
4292 	if (cq->destroyed) {
4293 		DP_ERR(dev,
4294 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4295 		       cq, cq->icid);
4296 		return 0;
4297 	}
4298 
4299 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4300 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4301 
4302 	spin_lock_irqsave(&cq->cq_lock, flags);
4303 	cqe = cq->latest_cqe;
4304 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4305 	while (num_entries && is_valid_cqe(cq, cqe)) {
4306 		struct qedr_qp *qp;
4307 		int cnt = 0;
4308 
4309 		/* prevent speculative reads of any field of CQE */
4310 		rmb();
4311 
4312 		qp = cqe_get_qp(cqe);
4313 		if (!qp) {
4314 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4315 			break;
4316 		}
4317 
4318 		wc->qp = &qp->ibqp;
4319 
4320 		switch (cqe_get_type(cqe)) {
4321 		case RDMA_CQE_TYPE_REQUESTER:
4322 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4323 					       &cqe->req);
4324 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4325 			break;
4326 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4327 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4328 						&cqe->resp, &update);
4329 			break;
4330 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4331 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4332 						    wc, &cqe->resp);
4333 			update = 1;
4334 			break;
4335 		case RDMA_CQE_TYPE_INVALID:
4336 		default:
4337 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4338 			       cqe_get_type(cqe));
4339 		}
4340 		num_entries -= cnt;
4341 		wc += cnt;
4342 		done += cnt;
4343 
4344 		cqe = get_cqe(cq);
4345 	}
4346 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4347 
4348 	cq->cq_cons += new_cons - old_cons;
4349 
4350 	if (update)
4351 		/* doorbell notifies abount latest VALID entry,
4352 		 * but chain already point to the next INVALID one
4353 		 */
4354 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4355 
4356 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4357 	return done;
4358 }
4359 
4360 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4361 		     u8 port_num, const struct ib_wc *in_wc,
4362 		     const struct ib_grh *in_grh, const struct ib_mad *in,
4363 		     struct ib_mad *out_mad, size_t *out_mad_size,
4364 		     u16 *out_mad_pkey_index)
4365 {
4366 	return IB_MAD_RESULT_SUCCESS;
4367 }
4368