1 /*******************************************************************
2  * This file is part of the Emulex RoCE Device Driver for          *
3  * RoCE (RDMA over Converged Ethernet) adapters.                   *
4  * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
5  * EMULEX and SLI are trademarks of Emulex.                        *
6  * www.emulex.com                                                  *
7  *                                                                 *
8  * This program is free software; you can redistribute it and/or   *
9  * modify it under the terms of version 2 of the GNU General       *
10  * Public License as published by the Free Software Foundation.    *
11  * This program is distributed in the hope that it will be useful. *
12  * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
13  * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
14  * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
15  * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16  * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
17  * more details, a copy of which can be found in the file COPYING  *
18  * included with this package.                                     *
19  *
20  * Contact Information:
21  * linux-drivers@emulex.com
22  *
23  * Emulex
24  * 3333 Susan Street
25  * Costa Mesa, CA 92626
26  *******************************************************************/
27 
28 #include <linux/dma-mapping.h>
29 #include <rdma/ib_verbs.h>
30 #include <rdma/ib_user_verbs.h>
31 #include <rdma/iw_cm.h>
32 #include <rdma/ib_umem.h>
33 #include <rdma/ib_addr.h>
34 
35 #include "ocrdma.h"
36 #include "ocrdma_hw.h"
37 #include "ocrdma_verbs.h"
38 #include "ocrdma_abi.h"
39 
40 int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
41 {
42 	if (index > 1)
43 		return -EINVAL;
44 
45 	*pkey = 0xffff;
46 	return 0;
47 }
48 
49 int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
50 		     int index, union ib_gid *sgid)
51 {
52 	struct ocrdma_dev *dev;
53 
54 	dev = get_ocrdma_dev(ibdev);
55 	memset(sgid, 0, sizeof(*sgid));
56 	if (index >= OCRDMA_MAX_SGID)
57 		return -EINVAL;
58 
59 	memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
60 
61 	return 0;
62 }
63 
64 int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
65 {
66 	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
67 
68 	memset(attr, 0, sizeof *attr);
69 	memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
70 	       min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
71 	ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
72 	attr->max_mr_size = ~0ull;
73 	attr->page_size_cap = 0xffff000;
74 	attr->vendor_id = dev->nic_info.pdev->vendor;
75 	attr->vendor_part_id = dev->nic_info.pdev->device;
76 	attr->hw_ver = 0;
77 	attr->max_qp = dev->attr.max_qp;
78 	attr->max_ah = dev->attr.max_qp;
79 	attr->max_qp_wr = dev->attr.max_wqe;
80 
81 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
82 					IB_DEVICE_RC_RNR_NAK_GEN |
83 					IB_DEVICE_SHUTDOWN_PORT |
84 					IB_DEVICE_SYS_IMAGE_GUID |
85 					IB_DEVICE_LOCAL_DMA_LKEY;
86 	attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
87 	attr->max_sge_rd = 0;
88 	attr->max_cq = dev->attr.max_cq;
89 	attr->max_cqe = dev->attr.max_cqe;
90 	attr->max_mr = dev->attr.max_mr;
91 	attr->max_mw = 0;
92 	attr->max_pd = dev->attr.max_pd;
93 	attr->atomic_cap = 0;
94 	attr->max_fmr = 0;
95 	attr->max_map_per_fmr = 0;
96 	attr->max_qp_rd_atom =
97 	    min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
98 	attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
99 	attr->max_srq = (dev->attr.max_qp - 1);
100 	attr->max_srq_sge = dev->attr.max_srq_sge;
101 	attr->max_srq_wr = dev->attr.max_rqe;
102 	attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
103 	attr->max_fast_reg_page_list_len = 0;
104 	attr->max_pkeys = 1;
105 	return 0;
106 }
107 
108 int ocrdma_query_port(struct ib_device *ibdev,
109 		      u8 port, struct ib_port_attr *props)
110 {
111 	enum ib_port_state port_state;
112 	struct ocrdma_dev *dev;
113 	struct net_device *netdev;
114 
115 	dev = get_ocrdma_dev(ibdev);
116 	if (port > 1) {
117 		pr_err("%s(%d) invalid_port=0x%x\n", __func__,
118 		       dev->id, port);
119 		return -EINVAL;
120 	}
121 	netdev = dev->nic_info.netdev;
122 	if (netif_running(netdev) && netif_oper_up(netdev)) {
123 		port_state = IB_PORT_ACTIVE;
124 		props->phys_state = 5;
125 	} else {
126 		port_state = IB_PORT_DOWN;
127 		props->phys_state = 3;
128 	}
129 	props->max_mtu = IB_MTU_4096;
130 	props->active_mtu = iboe_get_mtu(netdev->mtu);
131 	props->lid = 0;
132 	props->lmc = 0;
133 	props->sm_lid = 0;
134 	props->sm_sl = 0;
135 	props->state = port_state;
136 	props->port_cap_flags =
137 	    IB_PORT_CM_SUP |
138 	    IB_PORT_REINIT_SUP |
139 	    IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP;
140 	props->gid_tbl_len = OCRDMA_MAX_SGID;
141 	props->pkey_tbl_len = 1;
142 	props->bad_pkey_cntr = 0;
143 	props->qkey_viol_cntr = 0;
144 	props->active_width = IB_WIDTH_1X;
145 	props->active_speed = 4;
146 	props->max_msg_sz = 0x80000000;
147 	props->max_vl_num = 4;
148 	return 0;
149 }
150 
151 int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
152 		       struct ib_port_modify *props)
153 {
154 	struct ocrdma_dev *dev;
155 
156 	dev = get_ocrdma_dev(ibdev);
157 	if (port > 1) {
158 		pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
159 		return -EINVAL;
160 	}
161 	return 0;
162 }
163 
164 static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
165 			   unsigned long len)
166 {
167 	struct ocrdma_mm *mm;
168 
169 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
170 	if (mm == NULL)
171 		return -ENOMEM;
172 	mm->key.phy_addr = phy_addr;
173 	mm->key.len = len;
174 	INIT_LIST_HEAD(&mm->entry);
175 
176 	mutex_lock(&uctx->mm_list_lock);
177 	list_add_tail(&mm->entry, &uctx->mm_head);
178 	mutex_unlock(&uctx->mm_list_lock);
179 	return 0;
180 }
181 
182 static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
183 			    unsigned long len)
184 {
185 	struct ocrdma_mm *mm, *tmp;
186 
187 	mutex_lock(&uctx->mm_list_lock);
188 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
189 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
190 			continue;
191 
192 		list_del(&mm->entry);
193 		kfree(mm);
194 		break;
195 	}
196 	mutex_unlock(&uctx->mm_list_lock);
197 }
198 
199 static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
200 			      unsigned long len)
201 {
202 	bool found = false;
203 	struct ocrdma_mm *mm;
204 
205 	mutex_lock(&uctx->mm_list_lock);
206 	list_for_each_entry(mm, &uctx->mm_head, entry) {
207 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
208 			continue;
209 
210 		found = true;
211 		break;
212 	}
213 	mutex_unlock(&uctx->mm_list_lock);
214 	return found;
215 }
216 
217 struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
218 					  struct ib_udata *udata)
219 {
220 	int status;
221 	struct ocrdma_ucontext *ctx;
222 	struct ocrdma_alloc_ucontext_resp resp;
223 	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
224 	struct pci_dev *pdev = dev->nic_info.pdev;
225 	u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
226 
227 	if (!udata)
228 		return ERR_PTR(-EFAULT);
229 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
230 	if (!ctx)
231 		return ERR_PTR(-ENOMEM);
232 	ctx->dev = dev;
233 	INIT_LIST_HEAD(&ctx->mm_head);
234 	mutex_init(&ctx->mm_list_lock);
235 
236 	ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
237 					    &ctx->ah_tbl.pa, GFP_KERNEL);
238 	if (!ctx->ah_tbl.va) {
239 		kfree(ctx);
240 		return ERR_PTR(-ENOMEM);
241 	}
242 	memset(ctx->ah_tbl.va, 0, map_len);
243 	ctx->ah_tbl.len = map_len;
244 
245 	memset(&resp, 0, sizeof(resp));
246 	resp.ah_tbl_len = ctx->ah_tbl.len;
247 	resp.ah_tbl_page = ctx->ah_tbl.pa;
248 
249 	status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
250 	if (status)
251 		goto map_err;
252 	resp.dev_id = dev->id;
253 	resp.max_inline_data = dev->attr.max_inline_data;
254 	resp.wqe_size = dev->attr.wqe_size;
255 	resp.rqe_size = dev->attr.rqe_size;
256 	resp.dpp_wqe_size = dev->attr.wqe_size;
257 
258 	memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
259 	status = ib_copy_to_udata(udata, &resp, sizeof(resp));
260 	if (status)
261 		goto cpy_err;
262 	return &ctx->ibucontext;
263 
264 cpy_err:
265 	ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
266 map_err:
267 	dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
268 			  ctx->ah_tbl.pa);
269 	kfree(ctx);
270 	return ERR_PTR(status);
271 }
272 
273 int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
274 {
275 	struct ocrdma_mm *mm, *tmp;
276 	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
277 	struct pci_dev *pdev = uctx->dev->nic_info.pdev;
278 
279 	ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
280 	dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
281 			  uctx->ah_tbl.pa);
282 
283 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
284 		list_del(&mm->entry);
285 		kfree(mm);
286 	}
287 	kfree(uctx);
288 	return 0;
289 }
290 
291 int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
292 {
293 	struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
294 	struct ocrdma_dev *dev = ucontext->dev;
295 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
296 	u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
297 	unsigned long len = (vma->vm_end - vma->vm_start);
298 	int status = 0;
299 	bool found;
300 
301 	if (vma->vm_start & (PAGE_SIZE - 1))
302 		return -EINVAL;
303 	found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
304 	if (!found)
305 		return -EINVAL;
306 
307 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
308 		dev->nic_info.db_total_size)) &&
309 		(len <=	dev->nic_info.db_page_size)) {
310 		/* doorbell mapping */
311 		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
312 					    len, vma->vm_page_prot);
313 	} else if (dev->nic_info.dpp_unmapped_len &&
314 		(vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
315 		(vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
316 			dev->nic_info.dpp_unmapped_len)) &&
317 		(len <= dev->nic_info.dpp_unmapped_len)) {
318 		/* dpp area mapping */
319 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
320 		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
321 					    len, vma->vm_page_prot);
322 	} else {
323 		/* queue memory mapping */
324 		status = remap_pfn_range(vma, vma->vm_start,
325 					 vma->vm_pgoff, len, vma->vm_page_prot);
326 	}
327 	return status;
328 }
329 
330 static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd,
331 				struct ib_ucontext *ib_ctx,
332 				struct ib_udata *udata)
333 {
334 	int status;
335 	u64 db_page_addr;
336 	u64 dpp_page_addr = 0;
337 	u32 db_page_size;
338 	struct ocrdma_alloc_pd_uresp rsp;
339 	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
340 
341 	memset(&rsp, 0, sizeof(rsp));
342 	rsp.id = pd->id;
343 	rsp.dpp_enabled = pd->dpp_enabled;
344 	db_page_addr = pd->dev->nic_info.unmapped_db +
345 			(pd->id * pd->dev->nic_info.db_page_size);
346 	db_page_size = pd->dev->nic_info.db_page_size;
347 
348 	status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
349 	if (status)
350 		return status;
351 
352 	if (pd->dpp_enabled) {
353 		dpp_page_addr = pd->dev->nic_info.dpp_unmapped_addr +
354 				(pd->id * OCRDMA_DPP_PAGE_SIZE);
355 		status = ocrdma_add_mmap(uctx, dpp_page_addr,
356 				 OCRDMA_DPP_PAGE_SIZE);
357 		if (status)
358 			goto dpp_map_err;
359 		rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
360 		rsp.dpp_page_addr_lo = dpp_page_addr;
361 	}
362 
363 	status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
364 	if (status)
365 		goto ucopy_err;
366 
367 	pd->uctx = uctx;
368 	return 0;
369 
370 ucopy_err:
371 	if (pd->dpp_enabled)
372 		ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE);
373 dpp_map_err:
374 	ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
375 	return status;
376 }
377 
378 struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
379 			      struct ib_ucontext *context,
380 			      struct ib_udata *udata)
381 {
382 	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
383 	struct ocrdma_pd *pd;
384 	int status;
385 
386 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
387 	if (!pd)
388 		return ERR_PTR(-ENOMEM);
389 	pd->dev = dev;
390 	if (udata && context) {
391 		pd->dpp_enabled = (dev->nic_info.dev_family ==
392 					OCRDMA_GEN2_FAMILY) ? true : false;
393 		pd->num_dpp_qp =
394 			pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
395 	}
396 	status = ocrdma_mbx_alloc_pd(dev, pd);
397 	if (status) {
398 		kfree(pd);
399 		return ERR_PTR(status);
400 	}
401 
402 	if (udata && context) {
403 		status = ocrdma_copy_pd_uresp(pd, context, udata);
404 		if (status)
405 			goto err;
406 	}
407 	return &pd->ibpd;
408 
409 err:
410 	ocrdma_dealloc_pd(&pd->ibpd);
411 	return ERR_PTR(status);
412 }
413 
414 int ocrdma_dealloc_pd(struct ib_pd *ibpd)
415 {
416 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
417 	struct ocrdma_dev *dev = pd->dev;
418 	int status;
419 	u64 usr_db;
420 
421 	status = ocrdma_mbx_dealloc_pd(dev, pd);
422 	if (pd->uctx) {
423 		u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
424 		    (pd->id * OCRDMA_DPP_PAGE_SIZE);
425 		if (pd->dpp_enabled)
426 			ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE);
427 		usr_db = dev->nic_info.unmapped_db +
428 		    (pd->id * dev->nic_info.db_page_size);
429 		ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
430 	}
431 	kfree(pd);
432 	return status;
433 }
434 
435 static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
436 					   int acc, u32 num_pbls,
437 					   u32 addr_check)
438 {
439 	int status;
440 	struct ocrdma_mr *mr;
441 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
442 	struct ocrdma_dev *dev = pd->dev;
443 
444 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
445 		pr_err("%s(%d) leaving err, invalid access rights\n",
446 		       __func__, dev->id);
447 		return ERR_PTR(-EINVAL);
448 	}
449 
450 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
451 	if (!mr)
452 		return ERR_PTR(-ENOMEM);
453 	mr->hwmr.dev = dev;
454 	mr->hwmr.fr_mr = 0;
455 	mr->hwmr.local_rd = 1;
456 	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
457 	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
458 	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
459 	mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
460 	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
461 	mr->hwmr.num_pbls = num_pbls;
462 
463 	status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pd->id, addr_check);
464 	if (status) {
465 		kfree(mr);
466 		return ERR_PTR(-ENOMEM);
467 	}
468 	mr->pd = pd;
469 	mr->ibmr.lkey = mr->hwmr.lkey;
470 	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
471 		mr->ibmr.rkey = mr->hwmr.lkey;
472 	return mr;
473 }
474 
475 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
476 {
477 	struct ocrdma_mr *mr;
478 
479 	mr = ocrdma_alloc_lkey(ibpd, acc, 0, OCRDMA_ADDR_CHECK_DISABLE);
480 	if (IS_ERR(mr))
481 		return ERR_CAST(mr);
482 
483 	return &mr->ibmr;
484 }
485 
486 static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
487 				   struct ocrdma_hw_mr *mr)
488 {
489 	struct pci_dev *pdev = dev->nic_info.pdev;
490 	int i = 0;
491 
492 	if (mr->pbl_table) {
493 		for (i = 0; i < mr->num_pbls; i++) {
494 			if (!mr->pbl_table[i].va)
495 				continue;
496 			dma_free_coherent(&pdev->dev, mr->pbl_size,
497 					  mr->pbl_table[i].va,
498 					  mr->pbl_table[i].pa);
499 		}
500 		kfree(mr->pbl_table);
501 		mr->pbl_table = NULL;
502 	}
503 }
504 
505 static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes)
506 {
507 	u32 num_pbls = 0;
508 	u32 idx = 0;
509 	int status = 0;
510 	u32 pbl_size;
511 
512 	do {
513 		pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
514 		if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
515 			status = -EFAULT;
516 			break;
517 		}
518 		num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
519 		num_pbls = num_pbls / (pbl_size / sizeof(u64));
520 		idx++;
521 	} while (num_pbls >= mr->hwmr.dev->attr.max_num_mr_pbl);
522 
523 	mr->hwmr.num_pbes = num_pbes;
524 	mr->hwmr.num_pbls = num_pbls;
525 	mr->hwmr.pbl_size = pbl_size;
526 	return status;
527 }
528 
529 static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
530 {
531 	int status = 0;
532 	int i;
533 	u32 dma_len = mr->pbl_size;
534 	struct pci_dev *pdev = dev->nic_info.pdev;
535 	void *va;
536 	dma_addr_t pa;
537 
538 	mr->pbl_table = kzalloc(sizeof(struct ocrdma_pbl) *
539 				mr->num_pbls, GFP_KERNEL);
540 
541 	if (!mr->pbl_table)
542 		return -ENOMEM;
543 
544 	for (i = 0; i < mr->num_pbls; i++) {
545 		va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
546 		if (!va) {
547 			ocrdma_free_mr_pbl_tbl(dev, mr);
548 			status = -ENOMEM;
549 			break;
550 		}
551 		memset(va, 0, dma_len);
552 		mr->pbl_table[i].va = va;
553 		mr->pbl_table[i].pa = pa;
554 	}
555 	return status;
556 }
557 
558 static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
559 			    u32 num_pbes)
560 {
561 	struct ocrdma_pbe *pbe;
562 	struct ib_umem_chunk *chunk;
563 	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
564 	struct ib_umem *umem = mr->umem;
565 	int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
566 
567 	if (!mr->hwmr.num_pbes)
568 		return;
569 
570 	pbe = (struct ocrdma_pbe *)pbl_tbl->va;
571 	pbe_cnt = 0;
572 
573 	shift = ilog2(umem->page_size);
574 
575 	list_for_each_entry(chunk, &umem->chunk_list, list) {
576 		/* get all the dma regions from the chunk. */
577 		for (i = 0; i < chunk->nmap; i++) {
578 			pages = sg_dma_len(&chunk->page_list[i]) >> shift;
579 			for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
580 				/* store the page address in pbe */
581 				pbe->pa_lo =
582 				    cpu_to_le32(sg_dma_address
583 						(&chunk->page_list[i]) +
584 						(umem->page_size * pg_cnt));
585 				pbe->pa_hi =
586 				    cpu_to_le32(upper_32_bits
587 						((sg_dma_address
588 						  (&chunk->page_list[i]) +
589 						  umem->page_size * pg_cnt)));
590 				pbe_cnt += 1;
591 				total_num_pbes += 1;
592 				pbe++;
593 
594 				/* if done building pbes, issue the mbx cmd. */
595 				if (total_num_pbes == num_pbes)
596 					return;
597 
598 				/* if the given pbl is full storing the pbes,
599 				 * move to next pbl.
600 				 */
601 				if (pbe_cnt ==
602 					(mr->hwmr.pbl_size / sizeof(u64))) {
603 					pbl_tbl++;
604 					pbe = (struct ocrdma_pbe *)pbl_tbl->va;
605 					pbe_cnt = 0;
606 				}
607 			}
608 		}
609 	}
610 }
611 
612 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
613 				 u64 usr_addr, int acc, struct ib_udata *udata)
614 {
615 	int status = -ENOMEM;
616 	struct ocrdma_dev *dev;
617 	struct ocrdma_mr *mr;
618 	struct ocrdma_pd *pd;
619 	u32 num_pbes;
620 
621 	pd = get_ocrdma_pd(ibpd);
622 	dev = pd->dev;
623 
624 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
625 		return ERR_PTR(-EINVAL);
626 
627 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
628 	if (!mr)
629 		return ERR_PTR(status);
630 	mr->hwmr.dev = dev;
631 	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
632 	if (IS_ERR(mr->umem)) {
633 		status = -EFAULT;
634 		goto umem_err;
635 	}
636 	num_pbes = ib_umem_page_count(mr->umem);
637 	status = ocrdma_get_pbl_info(mr, num_pbes);
638 	if (status)
639 		goto umem_err;
640 
641 	mr->hwmr.pbe_size = mr->umem->page_size;
642 	mr->hwmr.fbo = mr->umem->offset;
643 	mr->hwmr.va = usr_addr;
644 	mr->hwmr.len = len;
645 	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
646 	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
647 	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
648 	mr->hwmr.local_rd = 1;
649 	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
650 	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
651 	if (status)
652 		goto umem_err;
653 	build_user_pbes(dev, mr, num_pbes);
654 	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
655 	if (status)
656 		goto mbx_err;
657 	mr->pd = pd;
658 	mr->ibmr.lkey = mr->hwmr.lkey;
659 	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
660 		mr->ibmr.rkey = mr->hwmr.lkey;
661 
662 	return &mr->ibmr;
663 
664 mbx_err:
665 	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
666 umem_err:
667 	kfree(mr);
668 	return ERR_PTR(status);
669 }
670 
671 int ocrdma_dereg_mr(struct ib_mr *ib_mr)
672 {
673 	struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
674 	struct ocrdma_dev *dev = mr->hwmr.dev;
675 	int status;
676 
677 	status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
678 
679 	if (mr->hwmr.fr_mr == 0)
680 		ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
681 
682 	/* it could be user registered memory. */
683 	if (mr->umem)
684 		ib_umem_release(mr->umem);
685 	kfree(mr);
686 	return status;
687 }
688 
689 static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
690 				struct ib_ucontext *ib_ctx)
691 {
692 	int status;
693 	struct ocrdma_ucontext *uctx;
694 	struct ocrdma_create_cq_uresp uresp;
695 
696 	memset(&uresp, 0, sizeof(uresp));
697 	uresp.cq_id = cq->id;
698 	uresp.page_size = cq->len;
699 	uresp.num_pages = 1;
700 	uresp.max_hw_cqe = cq->max_hw_cqe;
701 	uresp.page_addr[0] = cq->pa;
702 	uresp.db_page_addr = cq->dev->nic_info.unmapped_db;
703 	uresp.db_page_size = cq->dev->nic_info.db_page_size;
704 	uresp.phase_change = cq->phase_change ? 1 : 0;
705 	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
706 	if (status) {
707 		pr_err("%s(%d) copy error cqid=0x%x.\n",
708 		       __func__, cq->dev->id, cq->id);
709 		goto err;
710 	}
711 	uctx = get_ocrdma_ucontext(ib_ctx);
712 	status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
713 	if (status)
714 		goto err;
715 	status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
716 	if (status) {
717 		ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
718 		goto err;
719 	}
720 	cq->ucontext = uctx;
721 err:
722 	return status;
723 }
724 
725 struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
726 			       struct ib_ucontext *ib_ctx,
727 			       struct ib_udata *udata)
728 {
729 	struct ocrdma_cq *cq;
730 	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
731 	int status;
732 	struct ocrdma_create_cq_ureq ureq;
733 
734 	if (udata) {
735 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
736 			return ERR_PTR(-EFAULT);
737 	} else
738 		ureq.dpp_cq = 0;
739 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
740 	if (!cq)
741 		return ERR_PTR(-ENOMEM);
742 
743 	spin_lock_init(&cq->cq_lock);
744 	spin_lock_init(&cq->comp_handler_lock);
745 	INIT_LIST_HEAD(&cq->sq_head);
746 	INIT_LIST_HEAD(&cq->rq_head);
747 	cq->dev = dev;
748 
749 	status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq);
750 	if (status) {
751 		kfree(cq);
752 		return ERR_PTR(status);
753 	}
754 	if (ib_ctx) {
755 		status = ocrdma_copy_cq_uresp(cq, udata, ib_ctx);
756 		if (status)
757 			goto ctx_err;
758 	}
759 	cq->phase = OCRDMA_CQE_VALID;
760 	cq->arm_needed = true;
761 	dev->cq_tbl[cq->id] = cq;
762 
763 	return &cq->ibcq;
764 
765 ctx_err:
766 	ocrdma_mbx_destroy_cq(dev, cq);
767 	kfree(cq);
768 	return ERR_PTR(status);
769 }
770 
771 int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
772 		     struct ib_udata *udata)
773 {
774 	int status = 0;
775 	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
776 
777 	if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
778 		status = -EINVAL;
779 		return status;
780 	}
781 	ibcq->cqe = new_cnt;
782 	return status;
783 }
784 
785 int ocrdma_destroy_cq(struct ib_cq *ibcq)
786 {
787 	int status;
788 	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
789 	struct ocrdma_dev *dev = cq->dev;
790 
791 	status = ocrdma_mbx_destroy_cq(dev, cq);
792 
793 	if (cq->ucontext) {
794 		ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len);
795 		ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db,
796 				dev->nic_info.db_page_size);
797 	}
798 	dev->cq_tbl[cq->id] = NULL;
799 
800 	kfree(cq);
801 	return status;
802 }
803 
804 static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
805 {
806 	int status = -EINVAL;
807 
808 	if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
809 		dev->qp_tbl[qp->id] = qp;
810 		status = 0;
811 	}
812 	return status;
813 }
814 
815 static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
816 {
817 	dev->qp_tbl[qp->id] = NULL;
818 }
819 
820 static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
821 				  struct ib_qp_init_attr *attrs)
822 {
823 	if (attrs->qp_type != IB_QPT_GSI &&
824 	    attrs->qp_type != IB_QPT_RC &&
825 	    attrs->qp_type != IB_QPT_UD) {
826 		pr_err("%s(%d) unsupported qp type=0x%x requested\n",
827 		       __func__, dev->id, attrs->qp_type);
828 		return -EINVAL;
829 	}
830 	if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
831 		pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
832 		       __func__, dev->id, attrs->cap.max_send_wr);
833 		pr_err("%s(%d) supported send_wr=0x%x\n",
834 		       __func__, dev->id, dev->attr.max_wqe);
835 		return -EINVAL;
836 	}
837 	if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
838 		pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
839 		       __func__, dev->id, attrs->cap.max_recv_wr);
840 		pr_err("%s(%d) supported recv_wr=0x%x\n",
841 		       __func__, dev->id, dev->attr.max_rqe);
842 		return -EINVAL;
843 	}
844 	if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
845 		pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
846 		       __func__, dev->id, attrs->cap.max_inline_data);
847 		pr_err("%s(%d) supported inline data size=0x%x\n",
848 		       __func__, dev->id, dev->attr.max_inline_data);
849 		return -EINVAL;
850 	}
851 	if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
852 		pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
853 		       __func__, dev->id, attrs->cap.max_send_sge);
854 		pr_err("%s(%d) supported send_sge=0x%x\n",
855 		       __func__, dev->id, dev->attr.max_send_sge);
856 		return -EINVAL;
857 	}
858 	if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
859 		pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
860 		       __func__, dev->id, attrs->cap.max_recv_sge);
861 		pr_err("%s(%d) supported recv_sge=0x%x\n",
862 		       __func__, dev->id, dev->attr.max_recv_sge);
863 		return -EINVAL;
864 	}
865 	/* unprivileged user space cannot create special QP */
866 	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
867 		pr_err
868 		    ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
869 		     __func__, dev->id, attrs->qp_type);
870 		return -EINVAL;
871 	}
872 	/* allow creating only one GSI type of QP */
873 	if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
874 		pr_err("%s(%d) GSI special QPs already created.\n",
875 		       __func__, dev->id);
876 		return -EINVAL;
877 	}
878 	/* verify consumer QPs are not trying to use GSI QP's CQ */
879 	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
880 		if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
881 		    (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
882 		    (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
883 		    (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
884 			pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
885 			       __func__, dev->id);
886 			return -EINVAL;
887 		}
888 	}
889 	return 0;
890 }
891 
892 static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
893 				struct ib_udata *udata, int dpp_offset,
894 				int dpp_credit_lmt, int srq)
895 {
896 	int status = 0;
897 	u64 usr_db;
898 	struct ocrdma_create_qp_uresp uresp;
899 	struct ocrdma_dev *dev = qp->dev;
900 	struct ocrdma_pd *pd = qp->pd;
901 
902 	memset(&uresp, 0, sizeof(uresp));
903 	usr_db = dev->nic_info.unmapped_db +
904 			(pd->id * dev->nic_info.db_page_size);
905 	uresp.qp_id = qp->id;
906 	uresp.sq_dbid = qp->sq.dbid;
907 	uresp.num_sq_pages = 1;
908 	uresp.sq_page_size = qp->sq.len;
909 	uresp.sq_page_addr[0] = qp->sq.pa;
910 	uresp.num_wqe_allocated = qp->sq.max_cnt;
911 	if (!srq) {
912 		uresp.rq_dbid = qp->rq.dbid;
913 		uresp.num_rq_pages = 1;
914 		uresp.rq_page_size = qp->rq.len;
915 		uresp.rq_page_addr[0] = qp->rq.pa;
916 		uresp.num_rqe_allocated = qp->rq.max_cnt;
917 	}
918 	uresp.db_page_addr = usr_db;
919 	uresp.db_page_size = dev->nic_info.db_page_size;
920 	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
921 		uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
922 		uresp.db_rq_offset = ((qp->id & 0xFFFF) < 128) ?
923 			OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET;
924 		uresp.db_shift = (qp->id < 128) ? 24 : 16;
925 	} else {
926 		uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
927 		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
928 		uresp.db_shift = 16;
929 	}
930 
931 	if (qp->dpp_enabled) {
932 		uresp.dpp_credit = dpp_credit_lmt;
933 		uresp.dpp_offset = dpp_offset;
934 	}
935 	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
936 	if (status) {
937 		pr_err("%s(%d) user copy error.\n", __func__, dev->id);
938 		goto err;
939 	}
940 	status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
941 				 uresp.sq_page_size);
942 	if (status)
943 		goto err;
944 
945 	if (!srq) {
946 		status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
947 					 uresp.rq_page_size);
948 		if (status)
949 			goto rq_map_err;
950 	}
951 	return status;
952 rq_map_err:
953 	ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
954 err:
955 	return status;
956 }
957 
958 static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
959 			     struct ocrdma_pd *pd)
960 {
961 	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
962 		qp->sq_db = dev->nic_info.db +
963 			(pd->id * dev->nic_info.db_page_size) +
964 			OCRDMA_DB_GEN2_SQ_OFFSET;
965 		qp->rq_db = dev->nic_info.db +
966 			(pd->id * dev->nic_info.db_page_size) +
967 			((qp->id < 128) ?
968 			OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET);
969 	} else {
970 		qp->sq_db = dev->nic_info.db +
971 			(pd->id * dev->nic_info.db_page_size) +
972 			OCRDMA_DB_SQ_OFFSET;
973 		qp->rq_db = dev->nic_info.db +
974 			(pd->id * dev->nic_info.db_page_size) +
975 			OCRDMA_DB_RQ_OFFSET;
976 	}
977 }
978 
979 static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
980 {
981 	qp->wqe_wr_id_tbl =
982 	    kzalloc(sizeof(*(qp->wqe_wr_id_tbl)) * qp->sq.max_cnt,
983 		    GFP_KERNEL);
984 	if (qp->wqe_wr_id_tbl == NULL)
985 		return -ENOMEM;
986 	qp->rqe_wr_id_tbl =
987 	    kzalloc(sizeof(u64) * qp->rq.max_cnt, GFP_KERNEL);
988 	if (qp->rqe_wr_id_tbl == NULL)
989 		return -ENOMEM;
990 
991 	return 0;
992 }
993 
994 static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
995 				      struct ocrdma_pd *pd,
996 				      struct ib_qp_init_attr *attrs)
997 {
998 	qp->pd = pd;
999 	spin_lock_init(&qp->q_lock);
1000 	INIT_LIST_HEAD(&qp->sq_entry);
1001 	INIT_LIST_HEAD(&qp->rq_entry);
1002 
1003 	qp->qp_type = attrs->qp_type;
1004 	qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
1005 	qp->max_inline_data = attrs->cap.max_inline_data;
1006 	qp->sq.max_sges = attrs->cap.max_send_sge;
1007 	qp->rq.max_sges = attrs->cap.max_recv_sge;
1008 	qp->state = OCRDMA_QPS_RST;
1009 }
1010 
1011 
1012 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1013 				   struct ib_qp_init_attr *attrs)
1014 {
1015 	if (attrs->qp_type == IB_QPT_GSI) {
1016 		dev->gsi_qp_created = 1;
1017 		dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
1018 		dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
1019 	}
1020 }
1021 
1022 struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
1023 			       struct ib_qp_init_attr *attrs,
1024 			       struct ib_udata *udata)
1025 {
1026 	int status;
1027 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1028 	struct ocrdma_qp *qp;
1029 	struct ocrdma_dev *dev = pd->dev;
1030 	struct ocrdma_create_qp_ureq ureq;
1031 	u16 dpp_credit_lmt, dpp_offset;
1032 
1033 	status = ocrdma_check_qp_params(ibpd, dev, attrs);
1034 	if (status)
1035 		goto gen_err;
1036 
1037 	memset(&ureq, 0, sizeof(ureq));
1038 	if (udata) {
1039 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1040 			return ERR_PTR(-EFAULT);
1041 	}
1042 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1043 	if (!qp) {
1044 		status = -ENOMEM;
1045 		goto gen_err;
1046 	}
1047 	qp->dev = dev;
1048 	ocrdma_set_qp_init_params(qp, pd, attrs);
1049 
1050 	mutex_lock(&dev->dev_lock);
1051 	status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
1052 					ureq.dpp_cq_id,
1053 					&dpp_offset, &dpp_credit_lmt);
1054 	if (status)
1055 		goto mbx_err;
1056 
1057 	/* user space QP's wr_id table are managed in library */
1058 	if (udata == NULL) {
1059 		qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
1060 				  OCRDMA_QP_FAST_REG);
1061 		status = ocrdma_alloc_wr_id_tbl(qp);
1062 		if (status)
1063 			goto map_err;
1064 	}
1065 
1066 	status = ocrdma_add_qpn_map(dev, qp);
1067 	if (status)
1068 		goto map_err;
1069 	ocrdma_set_qp_db(dev, qp, pd);
1070 	if (udata) {
1071 		status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
1072 					      dpp_credit_lmt,
1073 					      (attrs->srq != NULL));
1074 		if (status)
1075 			goto cpy_err;
1076 	}
1077 	ocrdma_store_gsi_qp_cq(dev, attrs);
1078 	qp->ibqp.qp_num = qp->id;
1079 	mutex_unlock(&dev->dev_lock);
1080 	return &qp->ibqp;
1081 
1082 cpy_err:
1083 	ocrdma_del_qpn_map(dev, qp);
1084 map_err:
1085 	ocrdma_mbx_destroy_qp(dev, qp);
1086 mbx_err:
1087 	mutex_unlock(&dev->dev_lock);
1088 	kfree(qp->wqe_wr_id_tbl);
1089 	kfree(qp->rqe_wr_id_tbl);
1090 	kfree(qp);
1091 	pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1092 gen_err:
1093 	return ERR_PTR(status);
1094 }
1095 
1096 int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1097 		      int attr_mask)
1098 {
1099 	int status = 0;
1100 	struct ocrdma_qp *qp;
1101 	struct ocrdma_dev *dev;
1102 	enum ib_qp_state old_qps;
1103 
1104 	qp = get_ocrdma_qp(ibqp);
1105 	dev = qp->dev;
1106 	if (attr_mask & IB_QP_STATE)
1107 		status = ocrdma_qp_state_machine(qp, attr->qp_state, &old_qps);
1108 	/* if new and previous states are same hw doesn't need to
1109 	 * know about it.
1110 	 */
1111 	if (status < 0)
1112 		return status;
1113 	status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
1114 	return status;
1115 }
1116 
1117 int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1118 		     int attr_mask, struct ib_udata *udata)
1119 {
1120 	unsigned long flags;
1121 	int status = -EINVAL;
1122 	struct ocrdma_qp *qp;
1123 	struct ocrdma_dev *dev;
1124 	enum ib_qp_state old_qps, new_qps;
1125 
1126 	qp = get_ocrdma_qp(ibqp);
1127 	dev = qp->dev;
1128 
1129 	/* syncronize with multiple context trying to change, retrive qps */
1130 	mutex_lock(&dev->dev_lock);
1131 	/* syncronize with wqe, rqe posting and cqe processing contexts */
1132 	spin_lock_irqsave(&qp->q_lock, flags);
1133 	old_qps = get_ibqp_state(qp->state);
1134 	if (attr_mask & IB_QP_STATE)
1135 		new_qps = attr->qp_state;
1136 	else
1137 		new_qps = old_qps;
1138 	spin_unlock_irqrestore(&qp->q_lock, flags);
1139 
1140 	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1141 		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1142 		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1143 		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1144 		       old_qps, new_qps);
1145 		goto param_err;
1146 	}
1147 
1148 	status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
1149 	if (status > 0)
1150 		status = 0;
1151 param_err:
1152 	mutex_unlock(&dev->dev_lock);
1153 	return status;
1154 }
1155 
1156 static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
1157 {
1158 	switch (mtu) {
1159 	case 256:
1160 		return IB_MTU_256;
1161 	case 512:
1162 		return IB_MTU_512;
1163 	case 1024:
1164 		return IB_MTU_1024;
1165 	case 2048:
1166 		return IB_MTU_2048;
1167 	case 4096:
1168 		return IB_MTU_4096;
1169 	default:
1170 		return IB_MTU_1024;
1171 	}
1172 }
1173 
1174 static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
1175 {
1176 	int ib_qp_acc_flags = 0;
1177 
1178 	if (qp_cap_flags & OCRDMA_QP_INB_WR)
1179 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1180 	if (qp_cap_flags & OCRDMA_QP_INB_RD)
1181 		ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1182 	return ib_qp_acc_flags;
1183 }
1184 
1185 int ocrdma_query_qp(struct ib_qp *ibqp,
1186 		    struct ib_qp_attr *qp_attr,
1187 		    int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1188 {
1189 	int status;
1190 	u32 qp_state;
1191 	struct ocrdma_qp_params params;
1192 	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1193 	struct ocrdma_dev *dev = qp->dev;
1194 
1195 	memset(&params, 0, sizeof(params));
1196 	mutex_lock(&dev->dev_lock);
1197 	status = ocrdma_mbx_query_qp(dev, qp, &params);
1198 	mutex_unlock(&dev->dev_lock);
1199 	if (status)
1200 		goto mbx_err;
1201 	qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
1202 	qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
1203 	qp_attr->path_mtu =
1204 		ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
1205 				OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
1206 				OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
1207 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
1208 	qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
1209 	qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
1210 	qp_attr->dest_qp_num =
1211 	    params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
1212 
1213 	qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
1214 	qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
1215 	qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
1216 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
1217 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1218 	qp_attr->cap.max_inline_data = dev->attr.max_inline_data;
1219 	qp_init_attr->cap = qp_attr->cap;
1220 	memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
1221 	       sizeof(params.dgid));
1222 	qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
1223 	    OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
1224 	qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
1225 	qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
1226 					  OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
1227 						OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
1228 	qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
1229 					      OCRDMA_QP_PARAMS_SQ_PSN_MASK) >>
1230 						OCRDMA_QP_PARAMS_TCLASS_SHIFT;
1231 
1232 	qp_attr->ah_attr.ah_flags = IB_AH_GRH;
1233 	qp_attr->ah_attr.port_num = 1;
1234 	qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
1235 			       OCRDMA_QP_PARAMS_SL_MASK) >>
1236 				OCRDMA_QP_PARAMS_SL_SHIFT;
1237 	qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
1238 			    OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
1239 				OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
1240 	qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
1241 			      OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
1242 				OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
1243 	qp_attr->retry_cnt =
1244 	    (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
1245 		OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
1246 	qp_attr->min_rnr_timer = 0;
1247 	qp_attr->pkey_index = 0;
1248 	qp_attr->port_num = 1;
1249 	qp_attr->ah_attr.src_path_bits = 0;
1250 	qp_attr->ah_attr.static_rate = 0;
1251 	qp_attr->alt_pkey_index = 0;
1252 	qp_attr->alt_port_num = 0;
1253 	qp_attr->alt_timeout = 0;
1254 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1255 	qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
1256 		    OCRDMA_QP_PARAMS_STATE_SHIFT;
1257 	qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
1258 	qp_attr->max_dest_rd_atomic =
1259 	    params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
1260 	qp_attr->max_rd_atomic =
1261 	    params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
1262 	qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
1263 				OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
1264 mbx_err:
1265 	return status;
1266 }
1267 
1268 static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx)
1269 {
1270 	int i = idx / 32;
1271 	unsigned int mask = (1 << (idx % 32));
1272 
1273 	if (srq->idx_bit_fields[i] & mask)
1274 		srq->idx_bit_fields[i] &= ~mask;
1275 	else
1276 		srq->idx_bit_fields[i] |= mask;
1277 }
1278 
1279 static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1280 {
1281 	int free_cnt;
1282 	if (q->head >= q->tail)
1283 		free_cnt = (q->max_cnt - q->head) + q->tail;
1284 	else
1285 		free_cnt = q->tail - q->head;
1286 	return free_cnt;
1287 }
1288 
1289 static int is_hw_sq_empty(struct ocrdma_qp *qp)
1290 {
1291 	return (qp->sq.tail == qp->sq.head &&
1292 		ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0);
1293 }
1294 
1295 static int is_hw_rq_empty(struct ocrdma_qp *qp)
1296 {
1297 	return (qp->rq.tail == qp->rq.head) ? 1 : 0;
1298 }
1299 
1300 static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
1301 {
1302 	return q->va + (q->head * q->entry_size);
1303 }
1304 
1305 static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
1306 				      u32 idx)
1307 {
1308 	return q->va + (idx * q->entry_size);
1309 }
1310 
1311 static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
1312 {
1313 	q->head = (q->head + 1) & q->max_wqe_idx;
1314 }
1315 
1316 static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
1317 {
1318 	q->tail = (q->tail + 1) & q->max_wqe_idx;
1319 }
1320 
1321 /* discard the cqe for a given QP */
1322 static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
1323 {
1324 	unsigned long cq_flags;
1325 	unsigned long flags;
1326 	int discard_cnt = 0;
1327 	u32 cur_getp, stop_getp;
1328 	struct ocrdma_cqe *cqe;
1329 	u32 qpn = 0;
1330 
1331 	spin_lock_irqsave(&cq->cq_lock, cq_flags);
1332 
1333 	/* traverse through the CQEs in the hw CQ,
1334 	 * find the matching CQE for a given qp,
1335 	 * mark the matching one discarded by clearing qpn.
1336 	 * ring the doorbell in the poll_cq() as
1337 	 * we don't complete out of order cqe.
1338 	 */
1339 
1340 	cur_getp = cq->getp;
1341 	/* find upto when do we reap the cq. */
1342 	stop_getp = cur_getp;
1343 	do {
1344 		if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
1345 			break;
1346 
1347 		cqe = cq->va + cur_getp;
1348 		/* if (a) done reaping whole hw cq, or
1349 		 *    (b) qp_xq becomes empty.
1350 		 * then exit
1351 		 */
1352 		qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
1353 		/* if previously discarded cqe found, skip that too. */
1354 		/* check for matching qp */
1355 		if (qpn == 0 || qpn != qp->id)
1356 			goto skip_cqe;
1357 
1358 		/* mark cqe discarded so that it is not picked up later
1359 		 * in the poll_cq().
1360 		 */
1361 		discard_cnt += 1;
1362 		cqe->cmn.qpn = 0;
1363 		if (is_cqe_for_sq(cqe))
1364 			ocrdma_hwq_inc_tail(&qp->sq);
1365 		else {
1366 			if (qp->srq) {
1367 				spin_lock_irqsave(&qp->srq->q_lock, flags);
1368 				ocrdma_hwq_inc_tail(&qp->srq->rq);
1369 				ocrdma_srq_toggle_bit(qp->srq, cur_getp);
1370 				spin_unlock_irqrestore(&qp->srq->q_lock, flags);
1371 
1372 			} else
1373 				ocrdma_hwq_inc_tail(&qp->rq);
1374 		}
1375 skip_cqe:
1376 		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
1377 	} while (cur_getp != stop_getp);
1378 	spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
1379 }
1380 
1381 static void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
1382 {
1383 	int found = false;
1384 	unsigned long flags;
1385 	struct ocrdma_dev *dev = qp->dev;
1386 	/* sync with any active CQ poll */
1387 
1388 	spin_lock_irqsave(&dev->flush_q_lock, flags);
1389 	found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1390 	if (found)
1391 		list_del(&qp->sq_entry);
1392 	if (!qp->srq) {
1393 		found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1394 		if (found)
1395 			list_del(&qp->rq_entry);
1396 	}
1397 	spin_unlock_irqrestore(&dev->flush_q_lock, flags);
1398 }
1399 
1400 int ocrdma_destroy_qp(struct ib_qp *ibqp)
1401 {
1402 	int status;
1403 	struct ocrdma_pd *pd;
1404 	struct ocrdma_qp *qp;
1405 	struct ocrdma_dev *dev;
1406 	struct ib_qp_attr attrs;
1407 	int attr_mask = IB_QP_STATE;
1408 	unsigned long flags;
1409 
1410 	qp = get_ocrdma_qp(ibqp);
1411 	dev = qp->dev;
1412 
1413 	attrs.qp_state = IB_QPS_ERR;
1414 	pd = qp->pd;
1415 
1416 	/* change the QP state to ERROR */
1417 	_ocrdma_modify_qp(ibqp, &attrs, attr_mask);
1418 
1419 	/* ensure that CQEs for newly created QP (whose id may be same with
1420 	 * one which just getting destroyed are same), dont get
1421 	 * discarded until the old CQEs are discarded.
1422 	 */
1423 	mutex_lock(&dev->dev_lock);
1424 	status = ocrdma_mbx_destroy_qp(dev, qp);
1425 
1426 	/*
1427 	 * acquire CQ lock while destroy is in progress, in order to
1428 	 * protect against proessing in-flight CQEs for this QP.
1429 	 */
1430 	spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1431 	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1432 		spin_lock(&qp->rq_cq->cq_lock);
1433 
1434 	ocrdma_del_qpn_map(dev, qp);
1435 
1436 	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1437 		spin_unlock(&qp->rq_cq->cq_lock);
1438 	spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1439 
1440 	if (!pd->uctx) {
1441 		ocrdma_discard_cqes(qp, qp->sq_cq);
1442 		ocrdma_discard_cqes(qp, qp->rq_cq);
1443 	}
1444 	mutex_unlock(&dev->dev_lock);
1445 
1446 	if (pd->uctx) {
1447 		ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len);
1448 		if (!qp->srq)
1449 			ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len);
1450 	}
1451 
1452 	ocrdma_del_flush_qp(qp);
1453 
1454 	kfree(qp->wqe_wr_id_tbl);
1455 	kfree(qp->rqe_wr_id_tbl);
1456 	kfree(qp);
1457 	return status;
1458 }
1459 
1460 static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
1461 {
1462 	int status;
1463 	struct ocrdma_create_srq_uresp uresp;
1464 
1465 	memset(&uresp, 0, sizeof(uresp));
1466 	uresp.rq_dbid = srq->rq.dbid;
1467 	uresp.num_rq_pages = 1;
1468 	uresp.rq_page_addr[0] = srq->rq.pa;
1469 	uresp.rq_page_size = srq->rq.len;
1470 	uresp.db_page_addr = srq->dev->nic_info.unmapped_db +
1471 	    (srq->pd->id * srq->dev->nic_info.db_page_size);
1472 	uresp.db_page_size = srq->dev->nic_info.db_page_size;
1473 	uresp.num_rqe_allocated = srq->rq.max_cnt;
1474 	if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
1475 		uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET;
1476 		uresp.db_shift = 24;
1477 	} else {
1478 		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
1479 		uresp.db_shift = 16;
1480 	}
1481 
1482 	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1483 	if (status)
1484 		return status;
1485 	status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
1486 				 uresp.rq_page_size);
1487 	if (status)
1488 		return status;
1489 	return status;
1490 }
1491 
1492 struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
1493 				 struct ib_srq_init_attr *init_attr,
1494 				 struct ib_udata *udata)
1495 {
1496 	int status = -ENOMEM;
1497 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1498 	struct ocrdma_dev *dev = pd->dev;
1499 	struct ocrdma_srq *srq;
1500 
1501 	if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
1502 		return ERR_PTR(-EINVAL);
1503 	if (init_attr->attr.max_wr > dev->attr.max_rqe)
1504 		return ERR_PTR(-EINVAL);
1505 
1506 	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1507 	if (!srq)
1508 		return ERR_PTR(status);
1509 
1510 	spin_lock_init(&srq->q_lock);
1511 	srq->dev = dev;
1512 	srq->pd = pd;
1513 	srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
1514 	status = ocrdma_mbx_create_srq(srq, init_attr, pd);
1515 	if (status)
1516 		goto err;
1517 
1518 	if (udata == NULL) {
1519 		srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
1520 			    GFP_KERNEL);
1521 		if (srq->rqe_wr_id_tbl == NULL)
1522 			goto arm_err;
1523 
1524 		srq->bit_fields_len = (srq->rq.max_cnt / 32) +
1525 		    (srq->rq.max_cnt % 32 ? 1 : 0);
1526 		srq->idx_bit_fields =
1527 		    kmalloc(srq->bit_fields_len * sizeof(u32), GFP_KERNEL);
1528 		if (srq->idx_bit_fields == NULL)
1529 			goto arm_err;
1530 		memset(srq->idx_bit_fields, 0xff,
1531 		       srq->bit_fields_len * sizeof(u32));
1532 	}
1533 
1534 	if (init_attr->attr.srq_limit) {
1535 		status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
1536 		if (status)
1537 			goto arm_err;
1538 	}
1539 
1540 	if (udata) {
1541 		status = ocrdma_copy_srq_uresp(srq, udata);
1542 		if (status)
1543 			goto arm_err;
1544 	}
1545 
1546 	return &srq->ibsrq;
1547 
1548 arm_err:
1549 	ocrdma_mbx_destroy_srq(dev, srq);
1550 err:
1551 	kfree(srq->rqe_wr_id_tbl);
1552 	kfree(srq->idx_bit_fields);
1553 	kfree(srq);
1554 	return ERR_PTR(status);
1555 }
1556 
1557 int ocrdma_modify_srq(struct ib_srq *ibsrq,
1558 		      struct ib_srq_attr *srq_attr,
1559 		      enum ib_srq_attr_mask srq_attr_mask,
1560 		      struct ib_udata *udata)
1561 {
1562 	int status = 0;
1563 	struct ocrdma_srq *srq;
1564 
1565 	srq = get_ocrdma_srq(ibsrq);
1566 	if (srq_attr_mask & IB_SRQ_MAX_WR)
1567 		status = -EINVAL;
1568 	else
1569 		status = ocrdma_mbx_modify_srq(srq, srq_attr);
1570 	return status;
1571 }
1572 
1573 int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
1574 {
1575 	int status;
1576 	struct ocrdma_srq *srq;
1577 
1578 	srq = get_ocrdma_srq(ibsrq);
1579 	status = ocrdma_mbx_query_srq(srq, srq_attr);
1580 	return status;
1581 }
1582 
1583 int ocrdma_destroy_srq(struct ib_srq *ibsrq)
1584 {
1585 	int status;
1586 	struct ocrdma_srq *srq;
1587 	struct ocrdma_dev *dev;
1588 
1589 	srq = get_ocrdma_srq(ibsrq);
1590 	dev = srq->dev;
1591 
1592 	status = ocrdma_mbx_destroy_srq(dev, srq);
1593 
1594 	if (srq->pd->uctx)
1595 		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
1596 
1597 	kfree(srq->idx_bit_fields);
1598 	kfree(srq->rqe_wr_id_tbl);
1599 	kfree(srq);
1600 	return status;
1601 }
1602 
1603 /* unprivileged verbs and their support functions. */
1604 static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1605 				struct ocrdma_hdr_wqe *hdr,
1606 				struct ib_send_wr *wr)
1607 {
1608 	struct ocrdma_ewqe_ud_hdr *ud_hdr =
1609 		(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
1610 	struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
1611 
1612 	ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
1613 	if (qp->qp_type == IB_QPT_GSI)
1614 		ud_hdr->qkey = qp->qkey;
1615 	else
1616 		ud_hdr->qkey = wr->wr.ud.remote_qkey;
1617 	ud_hdr->rsvd_ahid = ah->id;
1618 }
1619 
1620 static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
1621 			      struct ocrdma_sge *sge, int num_sge,
1622 			      struct ib_sge *sg_list)
1623 {
1624 	int i;
1625 
1626 	for (i = 0; i < num_sge; i++) {
1627 		sge[i].lrkey = sg_list[i].lkey;
1628 		sge[i].addr_lo = sg_list[i].addr;
1629 		sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
1630 		sge[i].len = sg_list[i].length;
1631 		hdr->total_len += sg_list[i].length;
1632 	}
1633 	if (num_sge == 0)
1634 		memset(sge, 0, sizeof(*sge));
1635 }
1636 
1637 static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1638 				    struct ocrdma_hdr_wqe *hdr,
1639 				    struct ocrdma_sge *sge,
1640 				    struct ib_send_wr *wr, u32 wqe_size)
1641 {
1642 	if (wr->send_flags & IB_SEND_INLINE) {
1643 		if (wr->sg_list[0].length > qp->max_inline_data) {
1644 			pr_err("%s() supported_len=0x%x,\n"
1645 			       " unspported len req=0x%x\n", __func__,
1646 			       qp->max_inline_data, wr->sg_list[0].length);
1647 			return -EINVAL;
1648 		}
1649 		memcpy(sge,
1650 		       (void *)(unsigned long)wr->sg_list[0].addr,
1651 		       wr->sg_list[0].length);
1652 		hdr->total_len = wr->sg_list[0].length;
1653 		wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
1654 		hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
1655 	} else {
1656 		ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1657 		if (wr->num_sge)
1658 			wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
1659 		else
1660 			wqe_size += sizeof(struct ocrdma_sge);
1661 		hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1662 	}
1663 	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1664 	return 0;
1665 }
1666 
1667 static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1668 			     struct ib_send_wr *wr)
1669 {
1670 	int status;
1671 	struct ocrdma_sge *sge;
1672 	u32 wqe_size = sizeof(*hdr);
1673 
1674 	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
1675 		ocrdma_build_ud_hdr(qp, hdr, wr);
1676 		sge = (struct ocrdma_sge *)(hdr + 2);
1677 		wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
1678 	} else
1679 		sge = (struct ocrdma_sge *)(hdr + 1);
1680 
1681 	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1682 	return status;
1683 }
1684 
1685 static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1686 			      struct ib_send_wr *wr)
1687 {
1688 	int status;
1689 	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1690 	struct ocrdma_sge *sge = ext_rw + 1;
1691 	u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
1692 
1693 	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1694 	if (status)
1695 		return status;
1696 	ext_rw->addr_lo = wr->wr.rdma.remote_addr;
1697 	ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
1698 	ext_rw->lrkey = wr->wr.rdma.rkey;
1699 	ext_rw->len = hdr->total_len;
1700 	return 0;
1701 }
1702 
1703 static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1704 			      struct ib_send_wr *wr)
1705 {
1706 	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1707 	struct ocrdma_sge *sge = ext_rw + 1;
1708 	u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
1709 	    sizeof(struct ocrdma_hdr_wqe);
1710 
1711 	ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1712 	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1713 	hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
1714 	hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1715 
1716 	ext_rw->addr_lo = wr->wr.rdma.remote_addr;
1717 	ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
1718 	ext_rw->lrkey = wr->wr.rdma.rkey;
1719 	ext_rw->len = hdr->total_len;
1720 }
1721 
1722 static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
1723 {
1724 	u32 val = qp->sq.dbid | (1 << 16);
1725 
1726 	iowrite32(val, qp->sq_db);
1727 }
1728 
1729 int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1730 		     struct ib_send_wr **bad_wr)
1731 {
1732 	int status = 0;
1733 	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1734 	struct ocrdma_hdr_wqe *hdr;
1735 	unsigned long flags;
1736 
1737 	spin_lock_irqsave(&qp->q_lock, flags);
1738 	if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
1739 		spin_unlock_irqrestore(&qp->q_lock, flags);
1740 		*bad_wr = wr;
1741 		return -EINVAL;
1742 	}
1743 
1744 	while (wr) {
1745 		if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
1746 		    wr->num_sge > qp->sq.max_sges) {
1747 			*bad_wr = wr;
1748 			status = -ENOMEM;
1749 			break;
1750 		}
1751 		hdr = ocrdma_hwq_head(&qp->sq);
1752 		hdr->cw = 0;
1753 		if (wr->send_flags & IB_SEND_SIGNALED)
1754 			hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
1755 		if (wr->send_flags & IB_SEND_FENCE)
1756 			hdr->cw |=
1757 			    (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
1758 		if (wr->send_flags & IB_SEND_SOLICITED)
1759 			hdr->cw |=
1760 			    (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
1761 		hdr->total_len = 0;
1762 		switch (wr->opcode) {
1763 		case IB_WR_SEND_WITH_IMM:
1764 			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
1765 			hdr->immdt = ntohl(wr->ex.imm_data);
1766 		case IB_WR_SEND:
1767 			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
1768 			ocrdma_build_send(qp, hdr, wr);
1769 			break;
1770 		case IB_WR_SEND_WITH_INV:
1771 			hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
1772 			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
1773 			hdr->lkey = wr->ex.invalidate_rkey;
1774 			status = ocrdma_build_send(qp, hdr, wr);
1775 			break;
1776 		case IB_WR_RDMA_WRITE_WITH_IMM:
1777 			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
1778 			hdr->immdt = ntohl(wr->ex.imm_data);
1779 		case IB_WR_RDMA_WRITE:
1780 			hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
1781 			status = ocrdma_build_write(qp, hdr, wr);
1782 			break;
1783 		case IB_WR_RDMA_READ_WITH_INV:
1784 			hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
1785 		case IB_WR_RDMA_READ:
1786 			ocrdma_build_read(qp, hdr, wr);
1787 			break;
1788 		case IB_WR_LOCAL_INV:
1789 			hdr->cw |=
1790 			    (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
1791 			hdr->cw |= (sizeof(struct ocrdma_hdr_wqe) /
1792 				OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
1793 			hdr->lkey = wr->ex.invalidate_rkey;
1794 			break;
1795 		default:
1796 			status = -EINVAL;
1797 			break;
1798 		}
1799 		if (status) {
1800 			*bad_wr = wr;
1801 			break;
1802 		}
1803 		if (wr->send_flags & IB_SEND_SIGNALED)
1804 			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
1805 		else
1806 			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
1807 		qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
1808 		ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
1809 				   OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
1810 		/* make sure wqe is written before adapter can access it */
1811 		wmb();
1812 		/* inform hw to start processing it */
1813 		ocrdma_ring_sq_db(qp);
1814 
1815 		/* update pointer, counter for next wr */
1816 		ocrdma_hwq_inc_head(&qp->sq);
1817 		wr = wr->next;
1818 	}
1819 	spin_unlock_irqrestore(&qp->q_lock, flags);
1820 	return status;
1821 }
1822 
1823 static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
1824 {
1825 	u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
1826 
1827 	iowrite32(val, qp->rq_db);
1828 }
1829 
1830 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
1831 			     u16 tag)
1832 {
1833 	u32 wqe_size = 0;
1834 	struct ocrdma_sge *sge;
1835 	if (wr->num_sge)
1836 		wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
1837 	else
1838 		wqe_size = sizeof(*sge) + sizeof(*rqe);
1839 
1840 	rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
1841 				OCRDMA_WQE_SIZE_SHIFT);
1842 	rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
1843 	rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1844 	rqe->total_len = 0;
1845 	rqe->rsvd_tag = tag;
1846 	sge = (struct ocrdma_sge *)(rqe + 1);
1847 	ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
1848 	ocrdma_cpu_to_le32(rqe, wqe_size);
1849 }
1850 
1851 int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1852 		     struct ib_recv_wr **bad_wr)
1853 {
1854 	int status = 0;
1855 	unsigned long flags;
1856 	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1857 	struct ocrdma_hdr_wqe *rqe;
1858 
1859 	spin_lock_irqsave(&qp->q_lock, flags);
1860 	if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
1861 		spin_unlock_irqrestore(&qp->q_lock, flags);
1862 		*bad_wr = wr;
1863 		return -EINVAL;
1864 	}
1865 	while (wr) {
1866 		if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
1867 		    wr->num_sge > qp->rq.max_sges) {
1868 			*bad_wr = wr;
1869 			status = -ENOMEM;
1870 			break;
1871 		}
1872 		rqe = ocrdma_hwq_head(&qp->rq);
1873 		ocrdma_build_rqe(rqe, wr, 0);
1874 
1875 		qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
1876 		/* make sure rqe is written before adapter can access it */
1877 		wmb();
1878 
1879 		/* inform hw to start processing it */
1880 		ocrdma_ring_rq_db(qp);
1881 
1882 		/* update pointer, counter for next wr */
1883 		ocrdma_hwq_inc_head(&qp->rq);
1884 		wr = wr->next;
1885 	}
1886 	spin_unlock_irqrestore(&qp->q_lock, flags);
1887 	return status;
1888 }
1889 
1890 /* cqe for srq's rqe can potentially arrive out of order.
1891  * index gives the entry in the shadow table where to store
1892  * the wr_id. tag/index is returned in cqe to reference back
1893  * for a given rqe.
1894  */
1895 static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
1896 {
1897 	int row = 0;
1898 	int indx = 0;
1899 
1900 	for (row = 0; row < srq->bit_fields_len; row++) {
1901 		if (srq->idx_bit_fields[row]) {
1902 			indx = ffs(srq->idx_bit_fields[row]);
1903 			indx = (row * 32) + (indx - 1);
1904 			if (indx >= srq->rq.max_cnt)
1905 				BUG();
1906 			ocrdma_srq_toggle_bit(srq, indx);
1907 			break;
1908 		}
1909 	}
1910 
1911 	if (row == srq->bit_fields_len)
1912 		BUG();
1913 	return indx;
1914 }
1915 
1916 static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
1917 {
1918 	u32 val = srq->rq.dbid | (1 << 16);
1919 
1920 	iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
1921 }
1922 
1923 int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
1924 			 struct ib_recv_wr **bad_wr)
1925 {
1926 	int status = 0;
1927 	unsigned long flags;
1928 	struct ocrdma_srq *srq;
1929 	struct ocrdma_hdr_wqe *rqe;
1930 	u16 tag;
1931 
1932 	srq = get_ocrdma_srq(ibsrq);
1933 
1934 	spin_lock_irqsave(&srq->q_lock, flags);
1935 	while (wr) {
1936 		if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
1937 		    wr->num_sge > srq->rq.max_sges) {
1938 			status = -ENOMEM;
1939 			*bad_wr = wr;
1940 			break;
1941 		}
1942 		tag = ocrdma_srq_get_idx(srq);
1943 		rqe = ocrdma_hwq_head(&srq->rq);
1944 		ocrdma_build_rqe(rqe, wr, tag);
1945 
1946 		srq->rqe_wr_id_tbl[tag] = wr->wr_id;
1947 		/* make sure rqe is written before adapter can perform DMA */
1948 		wmb();
1949 		/* inform hw to start processing it */
1950 		ocrdma_ring_srq_db(srq);
1951 		/* update pointer, counter for next wr */
1952 		ocrdma_hwq_inc_head(&srq->rq);
1953 		wr = wr->next;
1954 	}
1955 	spin_unlock_irqrestore(&srq->q_lock, flags);
1956 	return status;
1957 }
1958 
1959 static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
1960 {
1961 	enum ib_wc_status ibwc_status = IB_WC_GENERAL_ERR;
1962 
1963 	switch (status) {
1964 	case OCRDMA_CQE_GENERAL_ERR:
1965 		ibwc_status = IB_WC_GENERAL_ERR;
1966 		break;
1967 	case OCRDMA_CQE_LOC_LEN_ERR:
1968 		ibwc_status = IB_WC_LOC_LEN_ERR;
1969 		break;
1970 	case OCRDMA_CQE_LOC_QP_OP_ERR:
1971 		ibwc_status = IB_WC_LOC_QP_OP_ERR;
1972 		break;
1973 	case OCRDMA_CQE_LOC_EEC_OP_ERR:
1974 		ibwc_status = IB_WC_LOC_EEC_OP_ERR;
1975 		break;
1976 	case OCRDMA_CQE_LOC_PROT_ERR:
1977 		ibwc_status = IB_WC_LOC_PROT_ERR;
1978 		break;
1979 	case OCRDMA_CQE_WR_FLUSH_ERR:
1980 		ibwc_status = IB_WC_WR_FLUSH_ERR;
1981 		break;
1982 	case OCRDMA_CQE_MW_BIND_ERR:
1983 		ibwc_status = IB_WC_MW_BIND_ERR;
1984 		break;
1985 	case OCRDMA_CQE_BAD_RESP_ERR:
1986 		ibwc_status = IB_WC_BAD_RESP_ERR;
1987 		break;
1988 	case OCRDMA_CQE_LOC_ACCESS_ERR:
1989 		ibwc_status = IB_WC_LOC_ACCESS_ERR;
1990 		break;
1991 	case OCRDMA_CQE_REM_INV_REQ_ERR:
1992 		ibwc_status = IB_WC_REM_INV_REQ_ERR;
1993 		break;
1994 	case OCRDMA_CQE_REM_ACCESS_ERR:
1995 		ibwc_status = IB_WC_REM_ACCESS_ERR;
1996 		break;
1997 	case OCRDMA_CQE_REM_OP_ERR:
1998 		ibwc_status = IB_WC_REM_OP_ERR;
1999 		break;
2000 	case OCRDMA_CQE_RETRY_EXC_ERR:
2001 		ibwc_status = IB_WC_RETRY_EXC_ERR;
2002 		break;
2003 	case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
2004 		ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
2005 		break;
2006 	case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
2007 		ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
2008 		break;
2009 	case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
2010 		ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
2011 		break;
2012 	case OCRDMA_CQE_REM_ABORT_ERR:
2013 		ibwc_status = IB_WC_REM_ABORT_ERR;
2014 		break;
2015 	case OCRDMA_CQE_INV_EECN_ERR:
2016 		ibwc_status = IB_WC_INV_EECN_ERR;
2017 		break;
2018 	case OCRDMA_CQE_INV_EEC_STATE_ERR:
2019 		ibwc_status = IB_WC_INV_EEC_STATE_ERR;
2020 		break;
2021 	case OCRDMA_CQE_FATAL_ERR:
2022 		ibwc_status = IB_WC_FATAL_ERR;
2023 		break;
2024 	case OCRDMA_CQE_RESP_TIMEOUT_ERR:
2025 		ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
2026 		break;
2027 	default:
2028 		ibwc_status = IB_WC_GENERAL_ERR;
2029 		break;
2030 	};
2031 	return ibwc_status;
2032 }
2033 
2034 static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2035 		      u32 wqe_idx)
2036 {
2037 	struct ocrdma_hdr_wqe *hdr;
2038 	struct ocrdma_sge *rw;
2039 	int opcode;
2040 
2041 	hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
2042 
2043 	ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
2044 	/* Undo the hdr->cw swap */
2045 	opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
2046 	switch (opcode) {
2047 	case OCRDMA_WRITE:
2048 		ibwc->opcode = IB_WC_RDMA_WRITE;
2049 		break;
2050 	case OCRDMA_READ:
2051 		rw = (struct ocrdma_sge *)(hdr + 1);
2052 		ibwc->opcode = IB_WC_RDMA_READ;
2053 		ibwc->byte_len = rw->len;
2054 		break;
2055 	case OCRDMA_SEND:
2056 		ibwc->opcode = IB_WC_SEND;
2057 		break;
2058 	case OCRDMA_LKEY_INV:
2059 		ibwc->opcode = IB_WC_LOCAL_INV;
2060 		break;
2061 	default:
2062 		ibwc->status = IB_WC_GENERAL_ERR;
2063 		pr_err("%s() invalid opcode received = 0x%x\n",
2064 		       __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2065 		break;
2066 	};
2067 }
2068 
2069 static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
2070 						struct ocrdma_cqe *cqe)
2071 {
2072 	if (is_cqe_for_sq(cqe)) {
2073 		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2074 				cqe->flags_status_srcqpn) &
2075 					~OCRDMA_CQE_STATUS_MASK);
2076 		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2077 				cqe->flags_status_srcqpn) |
2078 				(OCRDMA_CQE_WR_FLUSH_ERR <<
2079 					OCRDMA_CQE_STATUS_SHIFT));
2080 	} else {
2081 		if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2082 			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2083 					cqe->flags_status_srcqpn) &
2084 						~OCRDMA_CQE_UD_STATUS_MASK);
2085 			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2086 					cqe->flags_status_srcqpn) |
2087 					(OCRDMA_CQE_WR_FLUSH_ERR <<
2088 						OCRDMA_CQE_UD_STATUS_SHIFT));
2089 		} else {
2090 			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2091 					cqe->flags_status_srcqpn) &
2092 						~OCRDMA_CQE_STATUS_MASK);
2093 			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2094 					cqe->flags_status_srcqpn) |
2095 					(OCRDMA_CQE_WR_FLUSH_ERR <<
2096 						OCRDMA_CQE_STATUS_SHIFT));
2097 		}
2098 	}
2099 }
2100 
2101 static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2102 				  struct ocrdma_qp *qp, int status)
2103 {
2104 	bool expand = false;
2105 
2106 	ibwc->byte_len = 0;
2107 	ibwc->qp = &qp->ibqp;
2108 	ibwc->status = ocrdma_to_ibwc_err(status);
2109 
2110 	ocrdma_flush_qp(qp);
2111 	ocrdma_qp_state_machine(qp, IB_QPS_ERR, NULL);
2112 
2113 	/* if wqe/rqe pending for which cqe needs to be returned,
2114 	 * trigger inflating it.
2115 	 */
2116 	if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
2117 		expand = true;
2118 		ocrdma_set_cqe_status_flushed(qp, cqe);
2119 	}
2120 	return expand;
2121 }
2122 
2123 static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2124 				  struct ocrdma_qp *qp, int status)
2125 {
2126 	ibwc->opcode = IB_WC_RECV;
2127 	ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2128 	ocrdma_hwq_inc_tail(&qp->rq);
2129 
2130 	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2131 }
2132 
2133 static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2134 				  struct ocrdma_qp *qp, int status)
2135 {
2136 	ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2137 	ocrdma_hwq_inc_tail(&qp->sq);
2138 
2139 	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2140 }
2141 
2142 
2143 static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2144 				 struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
2145 				 bool *polled, bool *stop)
2146 {
2147 	bool expand;
2148 	int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2149 		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2150 
2151 	/* when hw sq is empty, but rq is not empty, so we continue
2152 	 * to keep the cqe in order to get the cq event again.
2153 	 */
2154 	if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
2155 		/* when cq for rq and sq is same, it is safe to return
2156 		 * flush cqe for RQEs.
2157 		 */
2158 		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2159 			*polled = true;
2160 			status = OCRDMA_CQE_WR_FLUSH_ERR;
2161 			expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2162 		} else {
2163 			/* stop processing further cqe as this cqe is used for
2164 			 * triggering cq event on buddy cq of RQ.
2165 			 * When QP is destroyed, this cqe will be removed
2166 			 * from the cq's hardware q.
2167 			 */
2168 			*polled = false;
2169 			*stop = true;
2170 			expand = false;
2171 		}
2172 	} else {
2173 		*polled = true;
2174 		expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2175 	}
2176 	return expand;
2177 }
2178 
2179 static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2180 				     struct ocrdma_cqe *cqe,
2181 				     struct ib_wc *ibwc, bool *polled)
2182 {
2183 	bool expand = false;
2184 	int tail = qp->sq.tail;
2185 	u32 wqe_idx;
2186 
2187 	if (!qp->wqe_wr_id_tbl[tail].signaled) {
2188 		*polled = false;    /* WC cannot be consumed yet */
2189 	} else {
2190 		ibwc->status = IB_WC_SUCCESS;
2191 		ibwc->wc_flags = 0;
2192 		ibwc->qp = &qp->ibqp;
2193 		ocrdma_update_wc(qp, ibwc, tail);
2194 		*polled = true;
2195 	}
2196 	wqe_idx = le32_to_cpu(cqe->wq.wqeidx) &	OCRDMA_CQE_WQEIDX_MASK;
2197 	if (tail != wqe_idx)
2198 		expand = true; /* Coalesced CQE can't be consumed yet */
2199 
2200 	ocrdma_hwq_inc_tail(&qp->sq);
2201 	return expand;
2202 }
2203 
2204 static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2205 			     struct ib_wc *ibwc, bool *polled, bool *stop)
2206 {
2207 	int status;
2208 	bool expand;
2209 
2210 	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2211 		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2212 
2213 	if (status == OCRDMA_CQE_SUCCESS)
2214 		expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
2215 	else
2216 		expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
2217 	return expand;
2218 }
2219 
2220 static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
2221 {
2222 	int status;
2223 
2224 	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2225 		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
2226 	ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
2227 						OCRDMA_CQE_SRCQP_MASK;
2228 	ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
2229 						OCRDMA_CQE_PKEY_MASK;
2230 	ibwc->wc_flags = IB_WC_GRH;
2231 	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2232 					OCRDMA_CQE_UD_XFER_LEN_SHIFT);
2233 	return status;
2234 }
2235 
2236 static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
2237 				       struct ocrdma_cqe *cqe,
2238 				       struct ocrdma_qp *qp)
2239 {
2240 	unsigned long flags;
2241 	struct ocrdma_srq *srq;
2242 	u32 wqe_idx;
2243 
2244 	srq = get_ocrdma_srq(qp->ibqp.srq);
2245 	wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT;
2246 	ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
2247 	spin_lock_irqsave(&srq->q_lock, flags);
2248 	ocrdma_srq_toggle_bit(srq, wqe_idx);
2249 	spin_unlock_irqrestore(&srq->q_lock, flags);
2250 	ocrdma_hwq_inc_tail(&srq->rq);
2251 }
2252 
2253 static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2254 				struct ib_wc *ibwc, bool *polled, bool *stop,
2255 				int status)
2256 {
2257 	bool expand;
2258 
2259 	/* when hw_rq is empty, but wq is not empty, so continue
2260 	 * to keep the cqe to get the cq event again.
2261 	 */
2262 	if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
2263 		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2264 			*polled = true;
2265 			status = OCRDMA_CQE_WR_FLUSH_ERR;
2266 			expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2267 		} else {
2268 			*polled = false;
2269 			*stop = true;
2270 			expand = false;
2271 		}
2272 	} else {
2273 		*polled = true;
2274 		expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2275 	}
2276 	return expand;
2277 }
2278 
2279 static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2280 				     struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2281 {
2282 	ibwc->opcode = IB_WC_RECV;
2283 	ibwc->qp = &qp->ibqp;
2284 	ibwc->status = IB_WC_SUCCESS;
2285 
2286 	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2287 		ocrdma_update_ud_rcqe(ibwc, cqe);
2288 	else
2289 		ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2290 
2291 	if (is_cqe_imm(cqe)) {
2292 		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2293 		ibwc->wc_flags |= IB_WC_WITH_IMM;
2294 	} else if (is_cqe_wr_imm(cqe)) {
2295 		ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2296 		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2297 		ibwc->wc_flags |= IB_WC_WITH_IMM;
2298 	} else if (is_cqe_invalidated(cqe)) {
2299 		ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
2300 		ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
2301 	}
2302 	if (qp->ibqp.srq)
2303 		ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
2304 	else {
2305 		ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2306 		ocrdma_hwq_inc_tail(&qp->rq);
2307 	}
2308 }
2309 
2310 static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2311 			     struct ib_wc *ibwc, bool *polled, bool *stop)
2312 {
2313 	int status;
2314 	bool expand = false;
2315 
2316 	ibwc->wc_flags = 0;
2317 	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2318 		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2319 					OCRDMA_CQE_UD_STATUS_MASK) >>
2320 					OCRDMA_CQE_UD_STATUS_SHIFT;
2321 	else
2322 		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2323 			     OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2324 
2325 	if (status == OCRDMA_CQE_SUCCESS) {
2326 		*polled = true;
2327 		ocrdma_poll_success_rcqe(qp, cqe, ibwc);
2328 	} else {
2329 		expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
2330 					      status);
2331 	}
2332 	return expand;
2333 }
2334 
2335 static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
2336 				   u16 cur_getp)
2337 {
2338 	if (cq->phase_change) {
2339 		if (cur_getp == 0)
2340 			cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
2341 	} else
2342 		/* clear valid bit */
2343 		cqe->flags_status_srcqpn = 0;
2344 }
2345 
2346 static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
2347 			    struct ib_wc *ibwc)
2348 {
2349 	u16 qpn = 0;
2350 	int i = 0;
2351 	bool expand = false;
2352 	int polled_hw_cqes = 0;
2353 	struct ocrdma_qp *qp = NULL;
2354 	struct ocrdma_dev *dev = cq->dev;
2355 	struct ocrdma_cqe *cqe;
2356 	u16 cur_getp; bool polled = false; bool stop = false;
2357 
2358 	cur_getp = cq->getp;
2359 	while (num_entries) {
2360 		cqe = cq->va + cur_getp;
2361 		/* check whether valid cqe or not */
2362 		if (!is_cqe_valid(cq, cqe))
2363 			break;
2364 		qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
2365 		/* ignore discarded cqe */
2366 		if (qpn == 0)
2367 			goto skip_cqe;
2368 		qp = dev->qp_tbl[qpn];
2369 		BUG_ON(qp == NULL);
2370 
2371 		if (is_cqe_for_sq(cqe)) {
2372 			expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
2373 						  &stop);
2374 		} else {
2375 			expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
2376 						  &stop);
2377 		}
2378 		if (expand)
2379 			goto expand_cqe;
2380 		if (stop)
2381 			goto stop_cqe;
2382 		/* clear qpn to avoid duplicate processing by discard_cqe() */
2383 		cqe->cmn.qpn = 0;
2384 skip_cqe:
2385 		polled_hw_cqes += 1;
2386 		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
2387 		ocrdma_change_cq_phase(cq, cqe, cur_getp);
2388 expand_cqe:
2389 		if (polled) {
2390 			num_entries -= 1;
2391 			i += 1;
2392 			ibwc = ibwc + 1;
2393 			polled = false;
2394 		}
2395 	}
2396 stop_cqe:
2397 	cq->getp = cur_getp;
2398 	if (polled_hw_cqes || expand || stop) {
2399 		ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited,
2400 				  polled_hw_cqes);
2401 	}
2402 	return i;
2403 }
2404 
2405 /* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2406 static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
2407 			      struct ocrdma_qp *qp, struct ib_wc *ibwc)
2408 {
2409 	int err_cqes = 0;
2410 
2411 	while (num_entries) {
2412 		if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
2413 			break;
2414 		if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
2415 			ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2416 			ocrdma_hwq_inc_tail(&qp->sq);
2417 		} else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
2418 			ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2419 			ocrdma_hwq_inc_tail(&qp->rq);
2420 		} else
2421 			return err_cqes;
2422 		ibwc->byte_len = 0;
2423 		ibwc->status = IB_WC_WR_FLUSH_ERR;
2424 		ibwc = ibwc + 1;
2425 		err_cqes += 1;
2426 		num_entries -= 1;
2427 	}
2428 	return err_cqes;
2429 }
2430 
2431 int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
2432 {
2433 	int cqes_to_poll = num_entries;
2434 	struct ocrdma_cq *cq = NULL;
2435 	unsigned long flags;
2436 	struct ocrdma_dev *dev;
2437 	int num_os_cqe = 0, err_cqes = 0;
2438 	struct ocrdma_qp *qp;
2439 
2440 	cq = get_ocrdma_cq(ibcq);
2441 	dev = cq->dev;
2442 
2443 	/* poll cqes from adapter CQ */
2444 	spin_lock_irqsave(&cq->cq_lock, flags);
2445 	num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
2446 	spin_unlock_irqrestore(&cq->cq_lock, flags);
2447 	cqes_to_poll -= num_os_cqe;
2448 
2449 	if (cqes_to_poll) {
2450 		wc = wc + num_os_cqe;
2451 		/* adapter returns single error cqe when qp moves to
2452 		 * error state. So insert error cqes with wc_status as
2453 		 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2454 		 * respectively which uses this CQ.
2455 		 */
2456 		spin_lock_irqsave(&dev->flush_q_lock, flags);
2457 		list_for_each_entry(qp, &cq->sq_head, sq_entry) {
2458 			if (cqes_to_poll == 0)
2459 				break;
2460 			err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
2461 			cqes_to_poll -= err_cqes;
2462 			num_os_cqe += err_cqes;
2463 			wc = wc + err_cqes;
2464 		}
2465 		spin_unlock_irqrestore(&dev->flush_q_lock, flags);
2466 	}
2467 	return num_os_cqe;
2468 }
2469 
2470 int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
2471 {
2472 	struct ocrdma_cq *cq;
2473 	unsigned long flags;
2474 	struct ocrdma_dev *dev;
2475 	u16 cq_id;
2476 	u16 cur_getp;
2477 	struct ocrdma_cqe *cqe;
2478 
2479 	cq = get_ocrdma_cq(ibcq);
2480 	cq_id = cq->id;
2481 	dev = cq->dev;
2482 
2483 	spin_lock_irqsave(&cq->cq_lock, flags);
2484 	if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
2485 		cq->armed = true;
2486 	if (cq_flags & IB_CQ_SOLICITED)
2487 		cq->solicited = true;
2488 
2489 	cur_getp = cq->getp;
2490 	cqe = cq->va + cur_getp;
2491 
2492 	/* check whether any valid cqe exist or not, if not then safe to
2493 	 * arm. If cqe is not yet consumed, then let it get consumed and then
2494 	 * we arm it to avoid false interrupts.
2495 	 */
2496 	if (!is_cqe_valid(cq, cqe) || cq->arm_needed) {
2497 		cq->arm_needed = false;
2498 		ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0);
2499 	}
2500 	spin_unlock_irqrestore(&cq->cq_lock, flags);
2501 	return 0;
2502 }
2503